cee514
Backport of upstream commit db7d62c8d5:
cee514
cee514
Avoid attempting to mmap memory from an offset that is not a multiple of
cee514
the system page size on systems with page sizes larger than 32KB.
cee514
cee514
https://www.sqlite.org/src/info/db7d62c8d58eb1e8654a762c9b199ae4e2759038
cee514
cee514
Index: src/os_unix.c
cee514
==================================================================
cee514
--- src/os_unix.c
cee514
+++ src/os_unix.c
cee514
@@ -321,10 +321,11 @@
cee514
   return geteuid() ? 0 : fchown(fd,uid,gid);
cee514
 }
cee514
 
cee514
 /* Forward reference */
cee514
 static int openDirectory(const char*, int*);
cee514
+static int unixGetpagesize(void);
cee514
 
cee514
 /*
cee514
 ** Many system calls are accessed through pointer-to-functions so that
cee514
 ** they may be overridden at runtime to facilitate fault injection during
cee514
 ** testing and sandboxing.  The following array holds the names and pointers
cee514
@@ -443,10 +444,13 @@
cee514
   { "mremap",       (sqlite3_syscall_ptr)mremap,          0 },
cee514
 #else
cee514
   { "mremap",       (sqlite3_syscall_ptr)0,               0 },
cee514
 #endif
cee514
 #define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent)
cee514
+
cee514
+  { "getpagesize",  (sqlite3_syscall_ptr)unixGetpagesize, 0 },
cee514
+#define osGetpagesize ((int(*)(void))aSyscall[24].pCurrent)
cee514
 
cee514
 }; /* End of the overrideable system calls */
cee514
 
cee514
 /*
cee514
 ** This is the xSetSystemCall() method of sqlite3_vfs for all of the
cee514
@@ -4103,10 +4107,40 @@
cee514
 #endif
cee514
 
cee514
   return rc;        
cee514
 }
cee514
 
cee514
+/*
cee514
+** Return the system page size.
cee514
+**
cee514
+** This function should not be called directly by other code in this file. 
cee514
+** Instead, it should be called via macro osGetpagesize().
cee514
+*/
cee514
+static int unixGetpagesize(void){
cee514
+#if defined(_BSD_SOURCE)
cee514
+  return getpagesize();
cee514
+#else
cee514
+  return (int)sysconf(_SC_PAGESIZE);
cee514
+#endif
cee514
+}
cee514
+
cee514
+/*
cee514
+** Return the minimum number of 32KB shm regions that should be mapped at
cee514
+** a time, assuming that each mapping must be an integer multiple of the
cee514
+** current system page-size.
cee514
+**
cee514
+** Usually, this is 1. The exception seems to be systems that are configured
cee514
+** to use 64KB pages - in this case each mapping must cover at least two
cee514
+** shm regions.
cee514
+*/
cee514
+static int unixShmRegionPerMap(void){
cee514
+  int shmsz = 32*1024;            /* SHM region size */
cee514
+  int pgsz = osGetpagesize();   /* System page size */
cee514
+  assert( ((pgsz-1)&pgsz)==0 );   /* Page size must be a power of 2 */
cee514
+  if( pgsz
cee514
+  return pgsz/shmsz;
cee514
+}
cee514
 
cee514
 /*
cee514
 ** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0.
cee514
 **
cee514
 ** This is not a VFS shared-memory method; it is a utility function called
cee514
@@ -4114,14 +4148,15 @@
cee514
 */
cee514
 static void unixShmPurge(unixFile *pFd){
cee514
   unixShmNode *p = pFd->pInode->pShmNode;
cee514
   assert( unixMutexHeld() );
cee514
   if( p && p->nRef==0 ){
cee514
+    int nShmPerMap = unixShmRegionPerMap();
cee514
     int i;
cee514
     assert( p->pInode==pFd->pInode );
cee514
     sqlite3_mutex_free(p->mutex);
cee514
-    for(i=0; i<p->nRegion; i++){
cee514
+    for(i=0; i<p->nRegion; i+=nShmPerMap){
cee514
       if( p->h>=0 ){
cee514
         osMunmap(p->apRegion[i], p->szRegion);
cee514
       }else{
cee514
         sqlite3_free(p->apRegion[i]);
cee514
       }
cee514
@@ -4324,10 +4359,12 @@
cee514
 ){
cee514
   unixFile *pDbFd = (unixFile*)fd;
cee514
   unixShm *p;
cee514
   unixShmNode *pShmNode;
cee514
   int rc = SQLITE_OK;
cee514
+  int nShmPerMap = unixShmRegionPerMap();
cee514
+  int nReqRegion;
cee514
 
cee514
   /* If the shared-memory file has not yet been opened, open it now. */
cee514
   if( pDbFd->pShm==0 ){
cee514
     rc = unixOpenSharedMemory(pDbFd);
cee514
     if( rc!=SQLITE_OK ) return rc;
cee514
@@ -4339,13 +4376,16 @@
cee514
   assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 );
cee514
   assert( pShmNode->pInode==pDbFd->pInode );
cee514
   assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 );
cee514
   assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 );
cee514
 
cee514
-  if( pShmNode->nRegion<=iRegion ){
cee514
+  /* Minimum number of regions required to be mapped. */
cee514
+  nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap;
cee514
+
cee514
+  if( pShmNode->nRegion
cee514
     char **apNew;                      /* New apRegion[] array */
cee514
-    int nByte = (iRegion+1)*szRegion;  /* Minimum required file size */
cee514
+    int nByte = nReqRegion*szRegion;   /* Minimum required file size */
cee514
     struct stat sStat;                 /* Used by fstat() */
cee514
 
cee514
     pShmNode->szRegion = szRegion;
cee514
 
cee514
     if( pShmNode->h>=0 ){
cee514
@@ -4390,21 +4430,23 @@
cee514
       }
cee514
     }
cee514
 
cee514
     /* Map the requested memory region into this processes address space. */
cee514
     apNew = (char **)sqlite3_realloc(
cee514
-        pShmNode->apRegion, (iRegion+1)*sizeof(char *)
cee514
+        pShmNode->apRegion, nReqRegion*sizeof(char *)
cee514
     );
cee514
     if( !apNew ){
cee514
       rc = SQLITE_IOERR_NOMEM;
cee514
       goto shmpage_out;
cee514
     }
cee514
     pShmNode->apRegion = apNew;
cee514
-    while(pShmNode->nRegion<=iRegion){
cee514
+    while( pShmNode->nRegion
cee514
+      int nMap = szRegion*nShmPerMap;
cee514
+      int i;
cee514
       void *pMem;
cee514
       if( pShmNode->h>=0 ){
cee514
-        pMem = osMmap(0, szRegion,
cee514
+        pMem = osMmap(0, nMap,
cee514
             pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, 
cee514
             MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion
cee514
         );
cee514
         if( pMem==MAP_FAILED ){
cee514
           rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename);
cee514
@@ -4416,12 +4458,15 @@
cee514
           rc = SQLITE_NOMEM;
cee514
           goto shmpage_out;
cee514
         }
cee514
         memset(pMem, 0, szRegion);
cee514
       }
cee514
-      pShmNode->apRegion[pShmNode->nRegion] = pMem;
cee514
-      pShmNode->nRegion++;
cee514
+
cee514
+      for(i=0; i
cee514
+        pShmNode->apRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i];
cee514
+      }
cee514
+      pShmNode->nRegion += nShmPerMap;
cee514
     }
cee514
   }
cee514
 
cee514
 shmpage_out:
cee514
   if( pShmNode->nRegion>iRegion ){
cee514
@@ -4631,25 +4676,10 @@
cee514
 #endif
cee514
 }
cee514
 
cee514
 #if SQLITE_MAX_MMAP_SIZE>0
cee514
 /*
cee514
-** Return the system page size.
cee514
-*/
cee514
-static int unixGetPagesize(void){
cee514
-#if HAVE_MREMAP
cee514
-  return 512;
cee514
-#elif defined(_BSD_SOURCE)
cee514
-  return getpagesize();
cee514
-#else
cee514
-  return (int)sysconf(_SC_PAGESIZE);
cee514
-#endif
cee514
-}
cee514
-#endif /* SQLITE_MAX_MMAP_SIZE>0 */
cee514
-
cee514
-#if SQLITE_MAX_MMAP_SIZE>0
cee514
-/*
cee514
 ** Attempt to set the size of the memory mapping maintained by file 
cee514
 ** descriptor pFd to nNew bytes. Any existing mapping is discarded.
cee514
 **
cee514
 ** If successful, this function sets the following variables:
cee514
 **
cee514
@@ -4680,12 +4712,16 @@
cee514
   assert( MAP_FAILED!=0 );
cee514
 
cee514
   if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE;
cee514
 
cee514
   if( pOrig ){
cee514
-    const int szSyspage = unixGetPagesize();
cee514
+#if HAVE_MREMAP
cee514
+    i64 nReuse = pFd->mmapSize;
cee514
+#else
cee514
+    const int szSyspage = osGetpagesize();
cee514
     i64 nReuse = (pFd->mmapSize & ~(szSyspage-1));
cee514
+#endif
cee514
     u8 *pReq = &pOrig[nReuse];
cee514
 
cee514
     /* Unmap any pages of the existing mapping that cannot be reused. */
cee514
     if( nReuse!=nOrig ){
cee514
       osMunmap(pReq, nOrig-nReuse);
cee514
@@ -7427,11 +7463,11 @@
cee514
   };
cee514
   unsigned int i;          /* Loop counter */
cee514
 
cee514
   /* Double-check that the aSyscall[] array has been constructed
cee514
   ** correctly.  See ticket [bb3a86e890c8e96ab] */
cee514
-  assert( ArraySize(aSyscall)==24 );
cee514
+  assert( ArraySize(aSyscall)==25 );
cee514
 
cee514
   /* Register all VFSes defined in the aVfs[] array */
cee514
   for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
cee514
     sqlite3_vfs_register(&aVfs[i], i==0);
cee514
   }
cee514
cee514
Index: src/test_syscall.c
cee514
==================================================================
cee514
--- src/test_syscall.c
cee514
+++ src/test_syscall.c
cee514
@@ -65,10 +65,15 @@
cee514
 **     Return true if the named system call exists. Or false otherwise.
cee514
 **
cee514
 **   test_syscall list
cee514
 **     Return a list of all system calls. The list is constructed using
cee514
 **     the xNextSystemCall() VFS method.
cee514
+**
cee514
+**   test_syscall pagesize PGSZ
cee514
+**     If PGSZ is a power of two greater than 256, install a wrapper around
cee514
+**     OS function getpagesize() that reports the system page size as PGSZ.
cee514
+**     Or, if PGSZ is less than zero, remove any wrapper already installed.
cee514
 */
cee514
 
cee514
 #include "sqliteInt.h"
cee514
 #include "sqlite3.h"
cee514
 #include "tcl.h"
cee514
@@ -87,11 +92,13 @@
cee514
 
cee514
 static struct TestSyscallGlobal {
cee514
   int bPersist;                   /* 1 for persistent errors, 0 for transient */
cee514
   int nCount;                     /* Fail after this many more calls */
cee514
   int nFail;                      /* Number of failures that have occurred */
cee514
-} gSyscall = { 0, 0 };
cee514
+  int pgsz;
cee514
+  sqlite3_syscall_ptr orig_getpagesize;
cee514
+} gSyscall = { 0, 0, 0, 0, 0 };
cee514
 
cee514
 static int ts_open(const char *, int, int);
cee514
 static int ts_close(int fd);
cee514
 static int ts_access(const char *zPath, int mode);
cee514
 static char *ts_getcwd(char *zPath, size_t nPath);
cee514
@@ -647,10 +654,49 @@
cee514
 
cee514
   pVfs = sqlite3_vfs_find(0);
cee514
   Tcl_SetObjResult(interp, Tcl_NewStringObj(pVfs->zName, -1));
cee514
   return TCL_OK;
cee514
 }
cee514
+
cee514
+static int ts_getpagesize(void){
cee514
+  return gSyscall.pgsz;
cee514
+}
cee514
+
cee514
+static int test_syscall_pagesize(
cee514
+  void * clientData,
cee514
+  Tcl_Interp *interp,
cee514
+  int objc,
cee514
+  Tcl_Obj *CONST objv[]
cee514
+){
cee514
+  sqlite3_vfs *pVfs = sqlite3_vfs_find(0);
cee514
+  int pgsz;
cee514
+  if( objc!=3 ){
cee514
+    Tcl_WrongNumArgs(interp, 2, objv, "PGSZ");
cee514
+    return TCL_ERROR;
cee514
+  }
cee514
+  if( Tcl_GetIntFromObj(interp, objv[2], &pgsz) ){
cee514
+    return TCL_ERROR;
cee514
+  }
cee514
+
cee514
+  if( pgsz<0 ){
cee514
+    if( gSyscall.orig_getpagesize ){
cee514
+      pVfs->xSetSystemCall(pVfs, "getpagesize", gSyscall.orig_getpagesize);
cee514
+    }
cee514
+  }else{
cee514
+    if( pgsz<512 || (pgsz & (pgsz-1)) ){
cee514
+      Tcl_AppendResult(interp, "pgsz out of range", 0);
cee514
+      return TCL_ERROR;
cee514
+    }
cee514
+    gSyscall.orig_getpagesize = pVfs->xGetSystemCall(pVfs, "getpagesize");
cee514
+    gSyscall.pgsz = pgsz;
cee514
+    pVfs->xSetSystemCall(
cee514
+        pVfs, "getpagesize", (sqlite3_syscall_ptr)ts_getpagesize
cee514
+    );
cee514
+  }
cee514
+
cee514
+  return TCL_OK;
cee514
+}
cee514
 
cee514
 static int test_syscall(
cee514
   void * clientData,
cee514
   Tcl_Interp *interp,
cee514
   int objc,
cee514
@@ -666,10 +712,11 @@
cee514
     { "reset",      test_syscall_reset },
cee514
     { "errno",      test_syscall_errno },
cee514
     { "exists",     test_syscall_exists },
cee514
     { "list",       test_syscall_list },
cee514
     { "defaultvfs", test_syscall_defaultvfs },
cee514
+    { "pagesize",   test_syscall_pagesize },
cee514
     { 0, 0 }
cee514
   };
cee514
   int iCmd;
cee514
   int rc;
cee514
 
cee514
cee514
Index: test/syscall.test
cee514
==================================================================
cee514
--- test/syscall.test
cee514
+++ test/syscall.test
cee514
@@ -59,10 +59,11 @@
cee514
 foreach s {
cee514
     open close access getcwd stat fstat ftruncate
cee514
     fcntl read pread write pwrite fchmod fallocate
cee514
     pread64 pwrite64 unlink openDirectory mkdir rmdir 
cee514
     statvfs fchown umask mmap munmap mremap
cee514
+    getpagesize
cee514
 } {
cee514
   if {[test_syscall exists $s]} {lappend syscall_list $s}
cee514
 }
cee514
 do_test 3.1 { lsort [test_syscall list] } [lsort $syscall_list]
cee514
 
cee514
cee514
ADDED   test/wal64k.test
cee514
Index: test/wal64k.test
cee514
==================================================================
cee514
--- test/wal64k.test
cee514
+++ test/wal64k.test
cee514
@@ -0,0 +1,47 @@
cee514
+# 2010 April 13
cee514
+#
cee514
+# The author disclaims copyright to this source code.  In place of
cee514
+# a legal notice, here is a blessing:
cee514
+#
cee514
+#    May you do good and not evil.
cee514
+#    May you find forgiveness for yourself and forgive others.
cee514
+#    May you share freely, never taking more than you give.
cee514
+#
cee514
+#***********************************************************************
cee514
+# This file implements regression tests for SQLite library.  The
cee514
+# focus of this file is testing the operation of the library in
cee514
+# "PRAGMA journal_mode=WAL" mode.
cee514
+#
cee514
+
cee514
+set testdir [file dirname $argv0]
cee514
+source $testdir/tester.tcl
cee514
+set testprefix wal64k
cee514
+
cee514
+ifcapable !wal {finish_test ; return }
cee514
+
cee514
+db close
cee514
+test_syscall pagesize 65536
cee514
+sqlite3 db test.db
cee514
+
cee514
+do_execsql_test 1.0 { 
cee514
+  PRAGMA journal_mode = WAL;
cee514
+  CREATE TABLE t1(x);
cee514
+  CREATE INDEX i1 ON t1(x);
cee514
+} {wal}
cee514
+do_test 1.1 { file size test.db-shm } {65536}
cee514
+
cee514
+do_test 1.2 {
cee514
+  execsql BEGIN
cee514
+  while {[file size test.db-shm]==65536} {
cee514
+    execsql { INSERT INTO t1 VALUES( randstr(900,1100) ) }
cee514
+  }
cee514
+  execsql COMMIT
cee514
+  file size test.db-shm
cee514
+} {131072}
cee514
+
cee514
+integrity_check 1.3
cee514
+
cee514
+db close
cee514
+test_syscall pagesize -1
cee514
+finish_test
cee514
+
cee514