Blob Blame History Raw
From 44505cb397c46baa7dd4a0456f737f36e6d19ad0 Mon Sep 17 00:00:00 2001
From: Christian Persch <chpe@src.gnome.org>
Date: Tue, 1 Jan 2019 18:16:18 +0100
Subject: [PATCH] glib: Fix named destinations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Named destinations may be described by bytestrings, containing
embedded NULs and not being NUL terminated. That means they cannot
be exposed directly as char*.

The alternatives are to escape the string from the internal representation
when exposing it in the API (e.g. in PopplerDest.named_dest), or to
add parallel API exposing it as GString, or GBytes. This patch chooses
the first option, since the presence of these named destionations in the
public, not sealed, PopplerDest struct means that the second option would
need more API additions. The chosen option is simpler, and does not
need the API users to adapt unless they create the named dest strings
themselves, or consume them in ways other than calling poppler APIs.

The escaping scheme chosen simply replaces embedded NUL with "\0" and
escapes a literal backslash with "\\".  This is a minimal ABI change in
that some strings that previously worked unchanged as destinations
(those containing backslash) now don't work, but on the other hand,
previously it was impossible to use any destinations containing embedded
NULs.

Add poppler_named_dest_{from,to}_bytestring() to perform that
conversion, and clarify the documentation for when you need them.

Based on a patch by José Aliste <jaliste@src.gnome.org>.

https://gitlab.freedesktop.org/poppler/poppler/issues/631
---
 glib/demo/utils.c                   |   2 -
 glib/poppler-action.cc              |   5 +-
 glib/poppler-action.h               |  16 +++
 glib/poppler-document.cc            | 151 ++++++++++++++++++++++++----
 glib/reference/poppler-sections.txt |   2 +
 5 files changed, 154 insertions(+), 22 deletions(-)

diff --git a/glib/demo/utils.c b/glib/demo/utils.c
index 6bf61614..38bde147 100644
--- a/glib/demo/utils.c
+++ b/glib/demo/utils.c
@@ -151,8 +151,6 @@ pgd_action_view_add_destination (GtkWidget   *action_view,
 		pgd_table_add_property (table, "<b>Zoom:</b>", str, row);
 		g_free (str);
 	} else {
-		pgd_table_add_property (table, "<b>Named Dest:</b>", dest->named_dest, row);
-
 		if (document && !remote) {
 			PopplerDest *new_dest;
 
diff --git a/glib/poppler-action.cc b/glib/poppler-action.cc
index 9af67571..7e0bc031 100644
--- a/glib/poppler-action.cc
+++ b/glib/poppler-action.cc
@@ -328,7 +328,8 @@ dest_new_named (const GooString *named_dest)
 	}
 
 	dest->type = POPPLER_DEST_NAMED;
-	dest->named_dest = g_strdup (named_dest->getCString ());
+	dest->named_dest = poppler_named_dest_from_bytestring((const guint8*)named_dest->getCString (),
+							      named_dest->getLength ());
 
 	return dest;
 }
diff --git a/glib/poppler-action.h b/glib/poppler-action.h
index 13468f79..93a026be 100644
--- a/glib/poppler-action.h
+++ b/glib/poppler-action.h
@@ -164,6 +164,14 @@ typedef struct _PopplerActionJavascript PopplerActionJavascript;
  * @change_zoom: whether scale factor should be changed
  *
  * Data structure for holding a destination
+ *
+ * Note that @named_dest is the string representation of the named
+ * destination. This is the right form to pass to poppler functions,
+ * e.g. poppler_document_find_dest(), but to get the destination as
+ * it appears in the PDF itself, you need to convert it to a bytestring
+ * with poppler_named_dest_to_bytestring() first.
+ * Also note that @named_dest does not have a defined encoding and
+ * is not in a form suitable to be displayed to the user.
  */
 struct _PopplerDest
 {
@@ -317,6 +325,12 @@ void           poppler_dest_free       (PopplerDest   *dest);
 void           poppler_dest_free       (PopplerDest   *dest);
 PopplerDest   *poppler_dest_copy       (PopplerDest   *dest);
 
+char   *poppler_named_dest_from_bytestring (const guint8 *data,
+                                            gsize         length);
+
+guint8 *poppler_named_dest_to_bytestring   (const char   *named_dest,
+                                            gsize        *length);
+
 G_END_DECLS
 
 #endif /* __POPPLER_GLIB_H__ */
diff --git a/glib/poppler-document.cc b/glib/poppler-document.cc
index a9b4103d..d97d1448 100644
--- a/glib/poppler-document.cc
+++ b/glib/poppler-document.cc
@@ -680,41 +680,154 @@ poppler_document_get_attachments (PopplerDocument *document)
   return g_list_reverse (retval);
 }
 
+/**
+ * poppler_named_dest_from_bytestring:
+ * @data: (array length=length): the bytestring data
+ * @length: the bytestring length
+ *
+ * Converts a bytestring into a zero-terminated string suitable to
+ * pass to poppler_document_find_dest().
+ *
+ * Note that the returned string has no defined encoding and is not
+ * suitable for display to the user.
+ *
+ * The returned data must be freed using g_free().
+ *
+ * Returns: (transfer full): the named dest
+ *
+ * Since: 0.73
+ */
+char *
+poppler_named_dest_from_bytestring (const guint8 *data,
+				    gsize         length)
+{
+  const guint8 *p, *pend;
+  char *dest, *q;
+
+  g_return_val_if_fail (length != 0 || data != NULL, NULL);
+  /* Each source byte needs maximally 2 destination chars (\\ or \0) */
+  q = dest = (gchar *)g_malloc (length * 2 + 1);
+
+  pend = data + length;
+  for (p = data; p < pend; ++p) {
+    switch (*p) {
+    case '\0':
+      *q++ = '\\';
+      *q++ = '0';
+      break;
+    case '\\':
+      *q++ = '\\';
+      *q++ = '\\';
+      break;
+    default:
+      *q++ = *p;
+      break;
+    }
+  }
+
+  *q = 0; /* zero terminate */
+  return dest;
+}
+
+/**
+ * poppler_named_dest_to_bytestring:
+ * @name: the named dest string
+ * @length: (out): a location to store the length of the returned bytestring
+ *
+ * Converts a named dest string (e.g. from #PopplerDest.named_dest) into a
+ * bytestring, inverting the transformation of
+ * poppler_named_dest_from_bytestring().
+ *
+ * Note that the returned data is not zero terminated and may also
+ * contains embedded NUL bytes.
+ *
+ * If @name is not a valid named dest string, returns %NULL.
+ *
+ * The returned data must be freed using g_free().
+ *
+ * Returns: (array length=length) (transfer full) (nullable): a new bytestring,
+ *   or %NULL
+ *
+ * Since: 0.73
+ */
+guint8 *
+poppler_named_dest_to_bytestring (const char *name,
+				  gsize      *length)
+{
+  const char *p;
+  guint8 *data, *q;
+  gsize len;
+
+  g_return_val_if_fail (name != NULL, NULL);
+  g_return_val_if_fail (length != NULL, NULL);
+
+  len = strlen (name);
+  q = data = (guint8*) g_malloc (len);
+  for (p = name; *p; ++p) {
+    if (*p == '\\') {
+      p++;
+      len--;
+      if (*p == '0')
+	*q++ = '\0';
+      else if (*p == '\\')
+	*q++ = '\\';
+      else
+	goto invalid;
+    } else {
+      *q++ = *p;
+    }
+  }
+
+  *length = len;
+  return data;
+
+invalid:
+  g_free(data);
+  *length = 0;
+  return NULL;
+}
+
 /**
  * poppler_document_find_dest:
  * @document: A #PopplerDocument
  * @link_name: a named destination
  *
- * Finds named destination @link_name in @document
+ * Creates a #PopplerDest for the named destination @link_name in @document.
+ *
+ * Note that named destinations are bytestrings, not string. That means that
+ * unless @link_name was returned by a poppler function (e.g. is
+ * #PopplerDest.named_dest), it needs to be converted to string
+ * using poppler_named_dest_from_bytestring() before being passed to this
+ * function.
  *
- * Return value: The #PopplerDest destination or %NULL if
- * @link_name is not a destination. Returned value must
- * be freed with #poppler_dest_free
+ * The returned value must be freed with poppler_dest_free().
+ *
+ * Return value: (transfer full): a new #PopplerDest destination, or %NULL if
+ *   @link_name is not a destination.
  **/
 PopplerDest *
 poppler_document_find_dest (PopplerDocument *document,
 			    const gchar     *link_name)
 {
-	PopplerDest *dest = NULL;
-	LinkDest *link_dest = NULL;
-	GooString *g_link_name;
+  g_return_val_if_fail (POPPLER_IS_DOCUMENT (document), NULL);
+  g_return_val_if_fail (link_name != NULL, NULL);
 
-	g_return_val_if_fail (POPPLER_IS_DOCUMENT (document), NULL);
-	g_return_val_if_fail (link_name != NULL, NULL);
+  gsize len;
+  guint8* data = poppler_named_dest_to_bytestring (link_name, &len);
+  if (data == NULL)
+    return NULL;
 
-	g_link_name = new GooString (link_name);
+  GooString g_link_name ((const char*)data, (int)len);
+  g_free (data);
 
-	if (g_link_name) {
-		link_dest = document->doc->findDest (g_link_name);
-		delete g_link_name;
-	}
+  LinkDest *link_dest = document->doc->findDest (&g_link_name);
+  if (link_dest == NULL)
+    return NULL;
 
-	if (link_dest) {
-		dest = _poppler_dest_new_goto (document, link_dest);
-		delete link_dest;
-	}
+  PopplerDest *dest = _poppler_dest_new_goto (document, link_dest);
+  delete link_dest;
 
-	return dest;
+  return dest;
 }
 
 char *_poppler_goo_string_to_utf8(GooString *s)
diff --git a/glib/reference/poppler-sections.txt b/glib/reference/poppler-sections.txt
index 6c15f773..39985553 100644
--- a/glib/reference/poppler-sections.txt
+++ b/glib/reference/poppler-sections.txt
@@ -735,6 +735,8 @@ poppler_text_span_get_type
 poppler_get_backend
 poppler_get_version
 poppler_date_parse
+poppler_named_dest_from_bytestring
+poppler_named_dest_to_bytestring
 poppler_color_new
 poppler_color_copy
 poppler_color_free
-- 
2.26.2