| From 943a7f434b10c19f8e8e865c3cc40685b9903822 Mon Sep 17 00:00:00 2001 |
| From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> |
| Date: Thu, 24 Mar 2022 17:32:43 -0300 |
| Subject: [PATCH 1/6] Provide a maximum job length depending on the |
| virtualization |
| |
| Identify if a system is running on baremetal or PowerVM and provide |
| a maximum job length adapted to each case. |
| |
| Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> |
| |
| lib/nx_inflate.c | 5 +++-- |
| lib/nx_zlib.c | 25 +++++++++++++++++++++++-- |
| lib/nx_zlib.h | 4 ++++ |
| 3 files changed, 30 insertions(+), 4 deletions(-) |
| |
| diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c |
| index ec32b4c..77ad33c 100644 |
| |
| |
| @@ -945,8 +945,9 @@ static int nx_inflate_(nx_streamp s, int flush) |
| uint32_t write_sz, source_sz, target_sz; |
| long loop_cnt = 0, loop_max = 0xffff; |
| |
| - /* inflate benefits from large jobs; memcopies must be amortized */ |
| - uint32_t inflate_per_job_len = 64 * nx_config.per_job_len; |
| + /** \brief inflate benefits from large jobs; memcopies must be |
| + * amortized. */ |
| + const uint32_t inflate_per_job_len = nx_config.per_job_len; |
| |
| /* nx hardware */ |
| uint32_t sfbt = 0, subc = 0, spbc, tpbc, nx_ce, fc; |
| diff --git a/lib/nx_zlib.c b/lib/nx_zlib.c |
| index 28ea482..a50d6f7 100644 |
| |
| |
| @@ -64,6 +64,18 @@ |
| #include "nx_utils.h" |
| #include "nx_zlib.h" |
| |
| +/* Use the following values as maximum length of NX jobs when the OS doesn't |
| + provide the value itself, which is the default behavior until Linux 5.17 */ |
| + |
| +/** \brief Maximum job length on baremetal |
| + * |
| + * While the system does allow up-to 2 GiB as the maximum job length, restrict |
| + * it to 64 MiB. |
| + */ |
| +#define DEFAULT_MAX_JOB_BAREMETAL 64 * 1024 * 1024 |
| +/** \brief Maximum job length on PowerVM */ |
| +#define DEFAULT_MAX_JOB_POWERVM 1024 * 1024 |
| + |
| struct nx_config_t nx_config; |
| static struct nx_dev_t nx_devices[NX_DEVICES_MAX]; |
| static int nx_dev_count = 0; |
| @@ -639,8 +651,13 @@ static int nx_query_job_limits() |
| } |
| } |
| |
| - /* On error return default value of 1 MB */ |
| - return (1024 * 1024); |
| + /* On error return default value. */ |
| + switch (nx_config.virtualization) { |
| + case BAREMETAL: |
| + return DEFAULT_MAX_JOB_BAREMETAL; |
| + default: |
| + return DEFAULT_MAX_JOB_POWERVM; |
| + } |
| } |
| |
| /* |
| @@ -659,6 +676,9 @@ static int nx_enumerate_engines() |
| int count = 0; |
| size_t n; |
| |
| + /* Assume baremetal by default. */ |
| + nx_config.virtualization = BAREMETAL; |
| + |
| d = opendir(DEVICE_TREE); |
| if (d == NULL){ |
| prt_err("open device tree dir failed.\n"); |
| @@ -712,6 +732,7 @@ static int nx_enumerate_engines() |
| } |
| /* On PowerVM, there is no concept of multiple NX engines. */ |
| if (strncmp(de->d_name, "ibm,powervm", 11) == 0){ |
| + nx_config.virtualization = POWERVM; |
| closedir(d); |
| return 1; |
| } |
| diff --git a/lib/nx_zlib.h b/lib/nx_zlib.h |
| index e84bd7e..fa73b01 100644 |
| |
| |
| @@ -129,6 +129,8 @@ void nx_print_dde(nx_dde_t *ddep, const char *msg); |
| #define zlib_version zlibVersion() |
| extern const char *zlibVersion OF((void)); |
| |
| +enum virtualization {BAREMETAL=0, POWERVM=1}; |
| + |
| /* common config variables for all streams */ |
| struct nx_config_t { |
| long page_sz; |
| @@ -158,6 +160,8 @@ struct nx_config_t { |
| * dynamic huffman */ |
| struct selector mode; /** mode selector: selects between software |
| * and hardware compression. */ |
| + uint8_t virtualization; /** Indicate the virtualization type being |
| + * used. */ |
| }; |
| typedef struct nx_config_t *nx_configp_t; |
| extern struct nx_config_t nx_config; |
| |
| From b22eb7bffe61e36f70661921a689e44370d3c7e5 Mon Sep 17 00:00:00 2001 |
| From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> |
| Date: Thu, 24 Mar 2022 18:03:28 -0300 |
| Subject: [PATCH 2/6] inflate: Move code that initializes the DDE to their own |
| functions |
| |
| Create functions nx_reset_dde() and nx_init_dde() based on previous code |
| helping to reduce the size of nx_inflate_() and making it easier to |
| understand the code. |
| |
| Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> |
| |
| lib/nx_inflate.c | 197 ++++++++++++++++++++++++++++------------------- |
| 1 file changed, 116 insertions(+), 81 deletions(-) |
| |
| diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c |
| index 77ad33c..f1d9adb 100644 |
| |
| |
| @@ -934,6 +934,120 @@ static int copy_data_to_fifo_in(nx_streamp s) { |
| return Z_OK; |
| } |
| |
| +/** \brief Reset DDE to initial values. |
| + * |
| + * @param s nx_streamp to be processed. |
| + * @return Function code as passed to CRB. The function will set the CRB and |
| + * return the value here. |
| + */ |
| +static int nx_reset_dde(nx_streamp s) { |
| + nx_gzip_crb_cpb_t *cmdp = s->nxcmdp; |
| + uint32_t fc; |
| + |
| + /* address/len lists */ |
| + clearp_dde(s->ddl_in); |
| + clearp_dde(s->ddl_out); |
| + |
| + /* FC, CRC, HistLen, Table 6-6 */ |
| + if (s->resuming || (s->dict_len > 0)) { |
| + /* Resuming a partially decompressed input. */ |
| + fc = GZIP_FC_DECOMPRESS_RESUME; |
| + } else { |
| + /* First decompress job */ |
| + fc = GZIP_FC_DECOMPRESS; |
| + |
| + /* We use the most recently measured compression ratio |
| + as a heuristic to estimate the input and output |
| + sizes. If we give too much input, the target buffer |
| + overflows and NX cycles are wasted, and then we |
| + must retry with smaller input size. 1000 is 100% */ |
| + s->last_comp_ratio = 1000UL; |
| + } |
| + |
| + /* clear then copy fc to the crb */ |
| + cmdp->crb.gzip_fc = 0; |
| + putnn(cmdp->crb, gzip_fc, fc); |
| + |
| + return fc; |
| +} |
| + |
| +/** \brief Initialize DDE, appending a dictionary, if necessary. |
| + * |
| + * @param s nx_streamp to be processed. |
| + * @return The history length |
| + */ |
| +static int nx_init_dde(nx_streamp s) { |
| + nx_gzip_crb_cpb_t *cmdp = s->nxcmdp; |
| + int nx_history_len = s->history_len; |
| + |
| + /* FC, CRC, HistLen, Table 6-6 */ |
| + if (s->resuming || (s->dict_len > 0)) { |
| + /* Resuming a partially decompressed input. The key |
| + to resume is supplying the max 32KB dictionary |
| + (history) to NX, which is basically the last 32KB |
| + or less of the output earlier produced. And also |
| + make sure partial checksums are carried forward |
| + */ |
| + |
| + /* Crc of prev job passed to the job to be resumed */ |
| + put32(cmdp->cpb, in_crc, s->crc32); |
| + put32(cmdp->cpb, in_adler, s->adler32); |
| + |
| + /* Round up the sizes to quadword. Section 2.10 |
| + Rounding up will not segfault because |
| + nx_alloc_buffer has padding at the beginning */ |
| + |
| + if (s->dict_len > 0) { |
| + /* lays dict on top of hist */ |
| + nx_history_len = nx_amend_history_with_dict(s); |
| + |
| + if (s->wrap == HEADER_ZLIB) { |
| + /* in the raw mode pass crc as is; in the zlib |
| + mode initialize them */ |
| + put32(cmdp->cpb, in_crc, INIT_CRC ); |
| + put32(cmdp->cpb, in_adler, INIT_ADLER); |
| + put32(cmdp->cpb, out_crc, INIT_CRC ); |
| + put32(cmdp->cpb, out_adler, INIT_ADLER); |
| + } |
| + print_dbg_info(s, __LINE__); |
| + } else { |
| + /* no dictionary here */ |
| + ASSERT( s->dict_len == 0 ); |
| + nx_history_len = (nx_history_len + NXQWSZ - 1) / NXQWSZ; |
| + putnn(cmdp->cpb, in_histlen, nx_history_len); |
| + /* convert to bytes */ |
| + nx_history_len = nx_history_len * NXQWSZ; |
| + |
| + if (nx_history_len > 0) { |
| + /* deflate history goes in first */ |
| + ASSERT(s->cur_out >= nx_history_len); |
| + nx_append_dde(s->ddl_in, |
| + s->fifo_out + (s->cur_out |
| + - nx_history_len), |
| + nx_history_len); |
| + } |
| + print_dbg_info(s, __LINE__); |
| + } |
| + } else { |
| + nx_history_len = s->history_len = 0; |
| + /* writing a 0 clears out subc as well */ |
| + cmdp->cpb.in_histlen = 0; |
| + |
| + /* initialize the crc values */ |
| + put32(cmdp->cpb, in_crc, INIT_CRC ); |
| + put32(cmdp->cpb, in_adler, INIT_ADLER); |
| + put32(cmdp->cpb, out_crc, INIT_CRC ); |
| + put32(cmdp->cpb, out_adler, INIT_ADLER); |
| + } |
| + |
| + /* We use the most recently measured compression ratio as a heuristic |
| + to estimate the input and output sizes. If we give too much input, |
| + the target buffer overflows and NX cycles are wasted, and then we |
| + must retry with smaller input size. 1000 is 100% */ |
| + s->last_comp_ratio = NX_MAX(NX_MIN(1000UL, s->last_comp_ratio), 100L); |
| + return nx_history_len; |
| +} |
| + |
| /** \brief Internal implementation of inflate. |
| * |
| * @param s nx_streamp to be processed. |
| @@ -1075,87 +1189,8 @@ static int nx_inflate_(nx_streamp s, int flush) |
| |
| /* NX decompresses input data */ |
| |
| - /* address/len lists */ |
| - clearp_dde(ddl_in); |
| - clearp_dde(ddl_out); |
| - |
| - nx_history_len = s->history_len; |
| - |
| - /* FC, CRC, HistLen, Table 6-6 */ |
| - if (s->resuming || (s->dict_len > 0)) { |
| - /* Resuming a partially decompressed input. The key |
| - to resume is supplying the max 32KB dictionary |
| - (history) to NX, which is basically the last 32KB |
| - or less of the output earlier produced. And also |
| - make sure partial checksums are carried forward |
| - */ |
| - fc = GZIP_FC_DECOMPRESS_RESUME; |
| - |
| - /* Crc of prev job passed to the job to be resumed */ |
| - put32(cmdp->cpb, in_crc, s->crc32); |
| - put32(cmdp->cpb, in_adler, s->adler32); |
| - |
| - /* Round up the sizes to quadword. Section 2.10 |
| - Rounding up will not segfault because |
| - nx_alloc_buffer has padding at the beginning */ |
| - |
| - if (s->dict_len > 0) { |
| - /* lays dict on top of hist */ |
| - nx_history_len = nx_amend_history_with_dict(s); |
| - |
| - if (s->wrap == HEADER_ZLIB) { |
| - /* in the raw mode pass crc as is; in the zlib mode |
| - initialize them */ |
| - put32(cmdp->cpb, in_crc, INIT_CRC ); |
| - put32(cmdp->cpb, in_adler, INIT_ADLER); |
| - put32(cmdp->cpb, out_crc, INIT_CRC ); |
| - put32(cmdp->cpb, out_adler, INIT_ADLER); |
| - } |
| - |
| - s->last_comp_ratio = NX_MAX( NX_MIN(1000UL, s->last_comp_ratio), 100L ); |
| - |
| - print_dbg_info(s, __LINE__); |
| - } |
| - else { |
| - /* no dictionary here */ |
| - ASSERT( s->dict_len == 0 ); |
| - nx_history_len = (nx_history_len + NXQWSZ - 1) / NXQWSZ; |
| - putnn(cmdp->cpb, in_histlen, nx_history_len); |
| - nx_history_len = nx_history_len * NXQWSZ; /* convert to bytes */ |
| - |
| - if (nx_history_len > 0) { |
| - /* deflate history goes in first */ |
| - ASSERT(s->cur_out >= nx_history_len); |
| - nx_append_dde(ddl_in, s->fifo_out + (s->cur_out - nx_history_len), nx_history_len); |
| - } |
| - print_dbg_info(s, __LINE__); |
| - } |
| - } |
| - else { |
| - /* First decompress job */ |
| - fc = GZIP_FC_DECOMPRESS; |
| - |
| - nx_history_len = s->history_len = 0; |
| - /* writing a 0 clears out subc as well */ |
| - cmdp->cpb.in_histlen = 0; |
| - |
| - /* initialize the crc values */ |
| - put32(cmdp->cpb, in_crc, INIT_CRC ); |
| - put32(cmdp->cpb, in_adler, INIT_ADLER); |
| - put32(cmdp->cpb, out_crc, INIT_CRC ); |
| - put32(cmdp->cpb, out_adler, INIT_ADLER); |
| - |
| - /* We use the most recently measured compression ratio |
| - as a heuristic to estimate the input and output |
| - sizes. If we give too much input, the target buffer |
| - overflows and NX cycles are wasted, and then we |
| - must retry with smaller input size. 1000 is 100% */ |
| - s->last_comp_ratio = 1000UL; |
| - } |
| - |
| - /* clear then copy fc to the crb */ |
| - cmdp->crb.gzip_fc = 0; |
| - putnn(cmdp->crb, gzip_fc, fc); |
| + fc = nx_reset_dde(s); |
| + nx_history_len = nx_init_dde(s); |
| |
| /* |
| * NX source buffers |
| |
| From e376d92fa704108f1258e3a41fc1ffcf551d1c5b Mon Sep 17 00:00:00 2001 |
| From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> |
| Date: Fri, 25 Mar 2022 09:57:32 -0300 |
| Subject: [PATCH 3/6] Fix the total amount of pages being touched |
| |
| Fix and error in nx_touch_pages_dde() that was causing the function to |
| touch a different number of pages than requested. |
| |
| Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> |
| |
| lib/nx_zlib.c | 2 +- |
| 1 file changed, 1 insertion(+), 1 deletion(-) |
| |
| diff --git a/lib/nx_zlib.c b/lib/nx_zlib.c |
| index a50d6f7..bf2a6fc 100644 |
| |
| |
| @@ -398,7 +398,7 @@ int nx_touch_pages_dde(nx_dde_t *ddep, long buf_sz, long page_sz, int wr) |
| |
| /* touching fewer pages than encoded in the ddebc */ |
| if ( total > buf_sz) { |
| - buf_len = NX_MIN(buf_len, total - buf_sz); |
| + buf_len = buf_sz - (total - buf_len); |
| nx_touch_pages((void *)buf_addr, buf_len, page_sz, wr); |
| prt_trace("touch loop break len 0x%x ddead %p\n", buf_len, (void *)buf_addr); |
| break; |
| |
| From 1f3dc128a476c9bbbb1b503d2fc8f54365101ebf Mon Sep 17 00:00:00 2001 |
| From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> |
| Date: Thu, 24 Mar 2022 18:18:34 -0300 |
| Subject: [PATCH 4/6] inflate: Move code that sets the DDE |
| |
| Create functions nx_set_dde_in() and nx_set_dde_out() based on old code. |
| |
| Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> |
| |
| lib/nx_inflate.c | 64 +++++++++++++++++++++++++++++++++--------------- |
| 1 file changed, 44 insertions(+), 20 deletions(-) |
| |
| diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c |
| index f1d9adb..a9671b2 100644 |
| |
| |
| @@ -1048,6 +1048,41 @@ static int nx_init_dde(nx_streamp s) { |
| return nx_history_len; |
| } |
| |
| +/** \brief Append input data to DDE |
| + * |
| + * @param s nx_streamp to be processed. |
| + * |
| + * @return The total amount of bytes appended to DDE |
| + */ |
| +static uint32_t nx_set_dde_in(nx_streamp s) { |
| + /* Buffered user input is next */ |
| + if (s->fifo_in != NULL) |
| + nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, s->used_in); |
| + /* Then current user input. */ |
| + nx_append_dde(s->ddl_in, s->next_in, s->avail_in); |
| + /* Total bytes going in to engine. */ |
| + return getp32(s->ddl_in, ddebc); |
| +} |
| + |
| +/** \brief Append output data to DDE |
| + * |
| + * @param s nx_streamp to be processed. |
| + * |
| + * @return The total amount of bytes appended to DDE |
| + */ |
| +static uint32_t nx_set_dde_out(nx_streamp s) { |
| + /* Decompress to user buffer first. */ |
| + nx_append_dde(s->ddl_out, s->next_out, s->avail_out); |
| + |
| + /* Overflow to fifo_out. |
| + used_out == 0 required by definition. */ |
| + ASSERT(s->used_out == 0); |
| + nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out, |
| + s->len_out - s->cur_out); |
| + |
| + return s->avail_out + s->len_out - s->cur_out; |
| +} |
| + |
| /** \brief Internal implementation of inflate. |
| * |
| * @param s nx_streamp to be processed. |
| @@ -1195,28 +1230,13 @@ static int nx_inflate_(nx_streamp s, int flush) |
| /* |
| * NX source buffers |
| */ |
| - /* buffered user input is next */ |
| - if (s->fifo_in != NULL) |
| - nx_append_dde(ddl_in, s->fifo_in + s->cur_in, s->used_in); |
| - /* then current user input */ |
| - nx_append_dde(ddl_in, s->next_in, s->avail_in); |
| - source_sz = getp32(ddl_in, ddebc); /* total bytes going in to engine */ |
| - ASSERT( source_sz > nx_history_len ); |
| + source_sz = nx_set_dde_in(s); |
| + ASSERT(source_sz > nx_history_len); |
| |
| /* |
| * NX target buffers |
| */ |
| - ASSERT(s->used_out == 0); |
| - |
| - uint32_t len_next_out = s->avail_out; |
| - nx_append_dde(ddl_out, s->next_out, len_next_out); /* decomp in to user buffer */ |
| - |
| - /* overflow, used_out == 0 required by definition, +used_out below is unnecessary */ |
| - nx_append_dde(ddl_out, s->fifo_out + s->cur_out + s->used_out, s->len_out - s->cur_out - s->used_out); |
| - target_sz = len_next_out + s->len_out - s->cur_out - s->used_out; |
| - |
| - prt_info("len_next_out %d len_out %d cur_out %d used_out %d source_sz %d history_len %d\n", |
| - len_next_out, s->len_out, s->cur_out, s->used_out, source_sz, nx_history_len); |
| + target_sz = nx_set_dde_out(s); |
| |
| /* We want exactly the History size amount of 32KB to overflow |
| in to fifo_out. If overflow is less, the history spans |
| @@ -1228,6 +1248,7 @@ static int nx_inflate_(nx_streamp s, int flush) |
| these copies (memcpy) for performance. Therefore, the |
| heuristic here will estimate the source size for the |
| desired target size */ |
| + uint32_t len_next_out = s->avail_out; |
| |
| /* avail_out plus 32 KB history plus a bit of overhead */ |
| uint32_t target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2); |
| @@ -1240,11 +1261,14 @@ static int nx_inflate_(nx_streamp s, int flush) |
| |
| prt_info("target_sz_expected %d source_sz_expected %d source_sz %d last_comp_ratio %d nx_history_len %d\n", target_sz_expected, source_sz_expected, source_sz, s->last_comp_ratio, nx_history_len); |
| |
| + prt_info("%s:%d len_next_out %d len_out %d cur_out %d" |
| + " used_out %d source_sz %d history_len %d\n", |
| + __FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out, |
| + s->used_out, source_sz, nx_history_len); |
| + |
| /* do not include input side history in the estimation */ |
| source_sz = source_sz - nx_history_len; |
| - |
| ASSERT(source_sz > 0); |
| - |
| source_sz = NX_MIN(source_sz, source_sz_expected); |
| |
| /* add the history back */ |
| |
| From eb6cb7b01fe1fa337979353e905e3ad96514b233 Mon Sep 17 00:00:00 2001 |
| From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> |
| Date: Thu, 24 Mar 2022 18:37:27 -0300 |
| Subject: [PATCH 5/6] inflate: cosmetic improvements |
| |
| - Add source code comments. |
| - Improve indentation. |
| - Break long lines. |
| - Fix error and information messages. |
| |
| Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> |
| |
| lib/nx_inflate.c | 51 ++++++++++++++++++++++++++++++++++++------------ |
| 1 file changed, 38 insertions(+), 13 deletions(-) |
| |
| diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c |
| index a9671b2..a6070bd 100644 |
| |
| |
| @@ -1090,14 +1090,31 @@ static uint32_t nx_set_dde_out(nx_streamp s) { |
| */ |
| static int nx_inflate_(nx_streamp s, int flush) |
| { |
| - /* queuing, file ops, byte counting */ |
| - uint32_t write_sz, source_sz, target_sz; |
| + /** \brief Sum of the bytes that may be used by NX as input |
| + * |
| + * Total amount of bytes sent to the NX to be used as input, |
| + * i.e. sum of the bytes in next_in and fifo_in. */ |
| + uint32_t source_sz; |
| + |
| + /** \brief Sum of the bytes that may be used by NX as output |
| + * |
| + * Maximum amount of bytes available by the NX to be used as output, |
| + * i.e. sum of the bytes available in next_out and fifo_out. */ |
| + uint32_t target_sz; |
| + |
| + uint32_t write_sz; |
| long loop_cnt = 0, loop_max = 0xffff; |
| |
| /** \brief inflate benefits from large jobs; memcopies must be |
| * amortized. */ |
| const uint32_t inflate_per_job_len = nx_config.per_job_len; |
| |
| + /** \brief Estimated value for target_sz. Used to calculate |
| + * source_sz_expected. */ |
| + uint32_t target_sz_expected; |
| + /** \brief Estimated value for source_sz. */ |
| + uint32_t source_sz_expected; |
| + |
| /* nx hardware */ |
| uint32_t sfbt = 0, subc = 0, spbc, tpbc, nx_ce, fc; |
| |
| @@ -1251,16 +1268,20 @@ static int nx_inflate_(nx_streamp s, int flush) |
| uint32_t len_next_out = s->avail_out; |
| |
| /* avail_out plus 32 KB history plus a bit of overhead */ |
| - uint32_t target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2); |
| + target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2); |
| |
| target_sz_expected = NX_MIN(target_sz_expected, inflate_per_job_len); |
| |
| /* e.g. if we want 100KB at the output and if the compression |
| ratio is 10% we want 10KB if input */ |
| - uint32_t source_sz_expected = (uint32_t)(((uint64_t)target_sz_expected * s->last_comp_ratio + 1000L)/1000UL); |
| + source_sz_expected = (uint32_t) (((uint64_t) target_sz_expected |
| + * s->last_comp_ratio + 1000L)/1000UL); |
| |
| - prt_info("target_sz_expected %d source_sz_expected %d source_sz %d last_comp_ratio %d nx_history_len %d\n", target_sz_expected, source_sz_expected, source_sz, s->last_comp_ratio, nx_history_len); |
| |
| + prt_info("%s:%d target_sz_expected %d source_sz_expected %d" |
| + " source_sz %d last_comp_ratio %d nx_history_len %d\n", |
| + __FUNCTION__, __LINE__, target_sz_expected, source_sz_expected, |
| + source_sz, s->last_comp_ratio, nx_history_len); |
| prt_info("%s:%d len_next_out %d len_out %d cur_out %d" |
| " used_out %d source_sz %d history_len %d\n", |
| __FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out, |
| @@ -1280,8 +1301,11 @@ static int nx_inflate_(nx_streamp s, int flush) |
| |
| /* fault in pages */ |
| nx_touch_pages_dde(ddl_in, source_sz, nx_config.page_sz, 0); |
| - nx_touch_pages_dde(ddl_out, target_sz, nx_config.page_sz, 1); |
| - nx_touch_pages( (void *)cmdp, sizeof(nx_gzip_crb_cpb_t), nx_config.page_sz, 0); |
| + nx_touch_pages_dde(ddl_out, |
| + target_sz, |
| + nx_config.page_sz, 1); |
| + nx_touch_pages((void *) cmdp, sizeof(nx_gzip_crb_cpb_t), |
| + nx_config.page_sz, 0); |
| |
| /* |
| * send job to NX |
| @@ -1298,9 +1322,9 @@ static int nx_inflate_(nx_streamp s, int flush) |
| faulting address to fsaddr */ |
| print_dbg_info(s, __LINE__); |
| |
| - prt_warn("ERR_NX_AT_FAULT: crb.csb.fsaddr %p source_sz %d ", |
| - (void *)cmdp->crb.csb.fsaddr, source_sz); |
| - prt_warn("target_sz %d\n", target_sz); |
| + prt_warn("ERR_NX_AT_FAULT: crb.csb.fsaddr %p source_sz %d " |
| + "target_sz %d\n", (void *)cmdp->crb.csb.fsaddr, |
| + source_sz, target_sz); |
| #ifdef NX_LOG_SOURCE_TARGET |
| nx_print_dde(ddl_in, "source"); |
| nx_print_dde(ddl_out, "target"); |
| @@ -1339,8 +1363,8 @@ static int nx_inflate_(nx_streamp s, int flush) |
| if (ticks_total > (timeout_pgfaults * nx_get_freq())) { |
| /* TODO what to do when page faults are too many? |
| * Kernel MM would have killed the process. */ |
| - prt_err("Cannot make progress; too many page"); |
| - prt_err(" faults cc= %d\n", cc); |
| + prt_err("Cannot make progress; too many page" |
| + " faults cc= %d\n", cc); |
| } |
| else { |
| prt_warn("ERR_NX_AT_FAULT: more retry\n"); |
| @@ -1397,7 +1421,8 @@ static int nx_inflate_(nx_streamp s, int flush) |
| cover the max expansion of INF_MIN_INPUT_LEN |
| bytes */ |
| |
| - prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data src %d hist %d\n", source_sz, nx_history_len); |
| + prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data" |
| + " src %d hist %d\n", source_sz, nx_history_len); |
| goto restart_nx; |
| |
| case ERR_NX_OK: |
| |
| From 806bf8e3ed1d0ae8a21bc6b2035df390f1062c26 Mon Sep 17 00:00:00 2001 |
| From: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> |
| Date: Mon, 28 Mar 2022 18:15:37 -0300 |
| Subject: [PATCH 6/6] inflate: Limit the amount of data added to the DDE |
| |
| Stop adding all input and output data to the DDE and limit based on the |
| calculated value for source_sz_expected and target_sz_expected. |
| By limiting these values, we end up better estimating the amount of |
| pages that need to be touched, reducing the amount of time spent |
| touching pages that might not be used. |
| |
| Reported-by: Puvichakravarthy Ramachandran <puvichakravarthy@in.ibm.com> |
| Reported-by: Poorna Chandra Vemula <Poorna.Chandra.Vemula@ibm.com> |
| Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> |
| |
| lib/nx_inflate.c | 180 +++++++++++++++++++++++++++++------------------ |
| 1 file changed, 111 insertions(+), 69 deletions(-) |
| |
| diff --git a/lib/nx_inflate.c b/lib/nx_inflate.c |
| index a6070bd..b30cdf6 100644 |
| |
| |
| @@ -955,13 +955,6 @@ static int nx_reset_dde(nx_streamp s) { |
| } else { |
| /* First decompress job */ |
| fc = GZIP_FC_DECOMPRESS; |
| - |
| - /* We use the most recently measured compression ratio |
| - as a heuristic to estimate the input and output |
| - sizes. If we give too much input, the target buffer |
| - overflows and NX cycles are wasted, and then we |
| - must retry with smaller input size. 1000 is 100% */ |
| - s->last_comp_ratio = 1000UL; |
| } |
| |
| /* clear then copy fc to the crb */ |
| @@ -1051,15 +1044,24 @@ static int nx_init_dde(nx_streamp s) { |
| /** \brief Append input data to DDE |
| * |
| * @param s nx_streamp to be processed. |
| + * @param source_sz_expected The total amount of bytes expected as input. It |
| + * does not include dictionary or history. |
| * |
| * @return The total amount of bytes appended to DDE |
| */ |
| -static uint32_t nx_set_dde_in(nx_streamp s) { |
| +static uint32_t nx_set_dde_in(nx_streamp s, uint32_t source_sz_expected) { |
| + uint32_t tmp = 0; |
| + |
| /* Buffered user input is next */ |
| - if (s->fifo_in != NULL) |
| - nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, s->used_in); |
| - /* Then current user input. */ |
| - nx_append_dde(s->ddl_in, s->next_in, s->avail_in); |
| + if (s->fifo_in != NULL) { |
| + tmp = NX_MIN(s->used_in, source_sz_expected); |
| + nx_append_dde(s->ddl_in, s->fifo_in + s->cur_in, tmp); |
| + } |
| + if (tmp < source_sz_expected) { |
| + tmp = NX_MIN(s->avail_in, source_sz_expected - tmp); |
| + /* Then current user input. */ |
| + nx_append_dde(s->ddl_in, s->next_in, tmp); |
| + } |
| /* Total bytes going in to engine. */ |
| return getp32(s->ddl_in, ddebc); |
| } |
| @@ -1067,20 +1069,32 @@ static uint32_t nx_set_dde_in(nx_streamp s) { |
| /** \brief Append output data to DDE |
| * |
| * @param s nx_streamp to be processed. |
| + * @param target_sz_expected The total amount of bytes expected as output. |
| * |
| * @return The total amount of bytes appended to DDE |
| */ |
| -static uint32_t nx_set_dde_out(nx_streamp s) { |
| +static uint32_t nx_set_dde_out(nx_streamp s, uint32_t target_sz_expected) { |
| + uint32_t tmp; |
| + uint32_t ret; |
| + |
| + ret = NX_MIN(s->avail_out, target_sz_expected); |
| + |
| /* Decompress to user buffer first. */ |
| - nx_append_dde(s->ddl_out, s->next_out, s->avail_out); |
| + nx_append_dde(s->ddl_out, s->next_out, ret); |
| + |
| + if (ret < target_sz_expected) { |
| + tmp = NX_MIN(s->len_out - s->cur_out, |
| + target_sz_expected - ret); |
| + |
| + /* Overflow to fifo_out. |
| + used_out == 0 required by definition. */ |
| + ASSERT(s->used_out == 0); |
| + nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out, tmp); |
| |
| - /* Overflow to fifo_out. |
| - used_out == 0 required by definition. */ |
| - ASSERT(s->used_out == 0); |
| - nx_append_dde(s->ddl_out, s->fifo_out + s->cur_out, |
| - s->len_out - s->cur_out); |
| + ret += tmp; |
| + } |
| |
| - return s->avail_out + s->len_out - s->cur_out; |
| + return ret; |
| } |
| |
| /** \brief Internal implementation of inflate. |
| @@ -1094,7 +1108,7 @@ static int nx_inflate_(nx_streamp s, int flush) |
| * |
| * Total amount of bytes sent to the NX to be used as input, |
| * i.e. sum of the bytes in next_in and fifo_in. */ |
| - uint32_t source_sz; |
| + uint32_t source_sz = 0; |
| |
| /** \brief Sum of the bytes that may be used by NX as output |
| * |
| @@ -1242,46 +1256,69 @@ static int nx_inflate_(nx_streamp s, int flush) |
| /* NX decompresses input data */ |
| |
| fc = nx_reset_dde(s); |
| - nx_history_len = nx_init_dde(s); |
| |
| - /* |
| - * NX source buffers |
| - */ |
| - source_sz = nx_set_dde_in(s); |
| - ASSERT(source_sz > nx_history_len); |
| + /** Estimate the amount of data sent to the NX. Ideally, we want |
| + * exactly the history size amount of 32 KiB to overflow in to fifo_out |
| + * in order to minimize copies of memory. |
| + * If overflow is less than 32 KiB, the history spans next_out and |
| + * fifo_out and must be copied in to fifo_out to setup history for the |
| + * next job. The fifo_out fraction is also copied back to user's |
| + * next_out before the next job. |
| + * If overflow is more, all the overflow must be copied back |
| + * to user's next_out before the next job. |
| + * If overflow is much more, we may get an ERR_NX_TARGET_SPACE, forcing |
| + * us to reduce the source before trying again. A retry in this case |
| + * will probably require NX to process much more than 32 KiB, which |
| + * requires more time than copying 32 KiB of data. |
| + * |
| + * With that said, we want to minimize unecessary work (i.e. memcpy |
| + * and retrying NX jobs) for performance. Therefore, the heuristic |
| + * here will estimate the source size for the desired target size, but |
| + * it prioritizes avoiding ERR_NX_TARGET_SPACE. */ |
| |
| - /* |
| - * NX target buffers |
| - */ |
| - target_sz = nx_set_dde_out(s); |
| - |
| - /* We want exactly the History size amount of 32KB to overflow |
| - in to fifo_out. If overflow is less, the history spans |
| - next_out and fifo_out and must be copied in to fifo_out to |
| - setup history for the next job, and the fifo_out fraction is |
| - also copied back to user's next_out before the next job. |
| - If overflow is more, all the overflow must be copied back |
| - to user's next_out before the next job. We want to minimize |
| - these copies (memcpy) for performance. Therefore, the |
| - heuristic here will estimate the source size for the |
| - desired target size */ |
| uint32_t len_next_out = s->avail_out; |
| + s->last_comp_ratio = NX_MAX(NX_MIN(1000UL, s->last_comp_ratio), 100L); |
| |
| - /* avail_out plus 32 KB history plus a bit of overhead */ |
| + /* avail_out plus 32 KiB history plus a bit of overhead */ |
| target_sz_expected = len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2); |
| - |
| target_sz_expected = NX_MIN(target_sz_expected, inflate_per_job_len); |
| |
| - /* e.g. if we want 100KB at the output and if the compression |
| - ratio is 10% we want 10KB if input */ |
| + /** Calculate source_sz_expected based on target_sz_expected and the |
| + * last compression ratio, e.g. if we want 100KB at the output and if |
| + * the compression ratio is 10% we want 10KB if input */ |
| source_sz_expected = (uint32_t) (((uint64_t) target_sz_expected |
| * s->last_comp_ratio + 1000L)/1000UL); |
| |
| + /** After calculating source_sz_expected, try to provide extra |
| + * target_sz_expected in order to avoid an ERR_NX_TARGET_SPACE. */ |
| + target_sz_expected = NX_MIN(len_next_out + INF_HIS_LEN + (INF_HIS_LEN >> 2), |
| + 4 * inflate_per_job_len); |
| + prt_info("%s:%d target_sz_expected %d source_sz_expected %d" |
| + " source_sz %d last_comp_ratio %d\n", |
| + __FUNCTION__, __LINE__, target_sz_expected, source_sz_expected, |
| + source_sz, s->last_comp_ratio); |
| + |
| + |
| +init_dde: |
| + nx_history_len = nx_init_dde(s); |
| + |
| + /* |
| + * NX source buffers |
| + */ |
| + source_sz = nx_set_dde_in(s, source_sz_expected); |
| + ASSERT(source_sz > nx_history_len); |
| + ASSERT(source_sz <= source_sz_expected + nx_history_len); |
| |
| prt_info("%s:%d target_sz_expected %d source_sz_expected %d" |
| " source_sz %d last_comp_ratio %d nx_history_len %d\n", |
| __FUNCTION__, __LINE__, target_sz_expected, source_sz_expected, |
| source_sz, s->last_comp_ratio, nx_history_len); |
| + |
| + /* |
| + * NX target buffers |
| + */ |
| + target_sz = nx_set_dde_out(s, target_sz_expected); |
| + |
| prt_info("%s:%d len_next_out %d len_out %d cur_out %d" |
| " used_out %d source_sz %d history_len %d\n", |
| __FUNCTION__, __LINE__, len_next_out, s->len_out, s->cur_out, |
| @@ -1345,19 +1382,22 @@ static int nx_inflate_(nx_streamp s, int flush) |
| that is about 2 pages minimum for source and |
| and 6 pages for target; if the system does not |
| have 8 free pages then the loop will last forever */ |
| - source_sz = source_sz - nx_history_len; |
| - if (source_sz > (2 * INF_MIN_INPUT_LEN)) |
| - source_sz = (source_sz + 1) / 2; |
| - else if (source_sz > INF_MIN_INPUT_LEN) |
| - source_sz = INF_MIN_INPUT_LEN; |
| - |
| - /* else if caller gave fewer source bytes, keep it as is */ |
| - source_sz = source_sz + nx_history_len; |
| - |
| - if (target_sz > (2 * INF_MAX_EXPANSION_BYTES)) |
| - target_sz = (target_sz + 1) / 2; |
| - else if (target_sz > INF_MAX_EXPANSION_BYTES) |
| - target_sz = INF_MAX_EXPANSION_BYTES; |
| + source_sz_expected = source_sz - nx_history_len; |
| + if (source_sz_expected > (2 * INF_MIN_INPUT_LEN)) |
| + source_sz_expected |
| + = (source_sz_expected + 1) / 2; |
| + else if (source_sz_expected > INF_MIN_INPUT_LEN) |
| + source_sz_expected = INF_MIN_INPUT_LEN; |
| + |
| + /* else if caller gave fewer source bytes, keep it as |
| + is. */ |
| + source_sz = source_sz_expected + nx_history_len; |
| + |
| + if (target_sz_expected > (2 * INF_MAX_EXPANSION_BYTES)) |
| + target_sz_expected |
| + = (target_sz_expected + 1) / 2; |
| + else if (target_sz_expected > INF_MAX_EXPANSION_BYTES) |
| + target_sz_expected = INF_MAX_EXPANSION_BYTES; |
| |
| ticks_total = nx_wait_ticks(500, ticks_total, 0); |
| if (ticks_total > (timeout_pgfaults * nx_get_freq())) { |
| @@ -1368,7 +1408,8 @@ static int nx_inflate_(nx_streamp s, int flush) |
| } |
| else { |
| prt_warn("ERR_NX_AT_FAULT: more retry\n"); |
| - goto restart_nx; |
| + fc = nx_reset_dde(s); |
| + goto init_dde; |
| } |
| } |
| |
| @@ -1403,18 +1444,17 @@ static int nx_inflate_(nx_streamp s, int flush) |
| /* Target buffer not large enough; retry smaller input |
| data; give at least 1 byte. SPBC/TPBC are not valid */ |
| ASSERT( source_sz > nx_history_len ); |
| - source_sz = ((source_sz - nx_history_len + 1) / 2) + nx_history_len; |
| + source_sz_expected = (source_sz - nx_history_len + 1) / 2; |
| |
| - source_sz = source_sz - nx_history_len; |
| /* reduce large source down to minimum viable; if |
| source is already small don't change it */ |
| - if (source_sz > (2 * INF_MIN_INPUT_LEN)) |
| - source_sz = (source_sz + 1) / 2; |
| - else if (source_sz > INF_MIN_INPUT_LEN) |
| - source_sz = INF_MIN_INPUT_LEN; |
| + if (source_sz_expected > (2 * INF_MIN_INPUT_LEN)) |
| + source_sz_expected = (source_sz_expected + 1) / 2; |
| + else if (source_sz_expected > INF_MIN_INPUT_LEN) |
| + source_sz_expected = INF_MIN_INPUT_LEN; |
| |
| /* else if caller gave fewer source bytes, keep it as is */ |
| - source_sz = source_sz + nx_history_len; |
| + source_sz = source_sz_expected + nx_history_len; |
| |
| /* do not change target size because we allocated a |
| minimum of INF_MAX_EXPANSION_BYTES which should |
| @@ -1422,8 +1462,10 @@ static int nx_inflate_(nx_streamp s, int flush) |
| bytes */ |
| |
| prt_info("ERR_NX_TARGET_SPACE; retry with smaller input data" |
| - " src %d hist %d\n", source_sz, nx_history_len); |
| - goto restart_nx; |
| + " source_sz_expected %d nx_history_len %d\n", |
| + source_sz_expected, nx_history_len); |
| + fc = nx_reset_dde(s); |
| + goto init_dde; |
| |
| case ERR_NX_OK: |
| |