Backport of upstream commit db7d62c8d5: Avoid attempting to mmap memory from an offset that is not a multiple of the system page size on systems with page sizes larger than 32KB. https://www.sqlite.org/src/info/db7d62c8d58eb1e8654a762c9b199ae4e2759038 Index: src/os_unix.c ================================================================== --- src/os_unix.c +++ src/os_unix.c @@ -321,10 +321,11 @@ return geteuid() ? 0 : fchown(fd,uid,gid); } /* Forward reference */ static int openDirectory(const char*, int*); +static int unixGetpagesize(void); /* ** Many system calls are accessed through pointer-to-functions so that ** they may be overridden at runtime to facilitate fault injection during ** testing and sandboxing. The following array holds the names and pointers @@ -443,10 +444,13 @@ { "mremap", (sqlite3_syscall_ptr)mremap, 0 }, #else { "mremap", (sqlite3_syscall_ptr)0, 0 }, #endif #define osMremap ((void*(*)(void*,size_t,size_t,int,...))aSyscall[23].pCurrent) + + { "getpagesize", (sqlite3_syscall_ptr)unixGetpagesize, 0 }, +#define osGetpagesize ((int(*)(void))aSyscall[24].pCurrent) }; /* End of the overrideable system calls */ /* ** This is the xSetSystemCall() method of sqlite3_vfs for all of the @@ -4103,10 +4107,40 @@ #endif return rc; } +/* +** Return the system page size. +** +** This function should not be called directly by other code in this file. +** Instead, it should be called via macro osGetpagesize(). +*/ +static int unixGetpagesize(void){ +#if defined(_BSD_SOURCE) + return getpagesize(); +#else + return (int)sysconf(_SC_PAGESIZE); +#endif +} + +/* +** Return the minimum number of 32KB shm regions that should be mapped at +** a time, assuming that each mapping must be an integer multiple of the +** current system page-size. +** +** Usually, this is 1. The exception seems to be systems that are configured +** to use 64KB pages - in this case each mapping must cover at least two +** shm regions. +*/ +static int unixShmRegionPerMap(void){ + int shmsz = 32*1024; /* SHM region size */ + int pgsz = osGetpagesize(); /* System page size */ + assert( ((pgsz-1)&pgsz)==0 ); /* Page size must be a power of 2 */ + if( pgszpInode->pShmNode; assert( unixMutexHeld() ); if( p && p->nRef==0 ){ + int nShmPerMap = unixShmRegionPerMap(); int i; assert( p->pInode==pFd->pInode ); sqlite3_mutex_free(p->mutex); - for(i=0; inRegion; i++){ + for(i=0; inRegion; i+=nShmPerMap){ if( p->h>=0 ){ osMunmap(p->apRegion[i], p->szRegion); }else{ sqlite3_free(p->apRegion[i]); } @@ -4324,10 +4359,12 @@ ){ unixFile *pDbFd = (unixFile*)fd; unixShm *p; unixShmNode *pShmNode; int rc = SQLITE_OK; + int nShmPerMap = unixShmRegionPerMap(); + int nReqRegion; /* If the shared-memory file has not yet been opened, open it now. */ if( pDbFd->pShm==0 ){ rc = unixOpenSharedMemory(pDbFd); if( rc!=SQLITE_OK ) return rc; @@ -4339,13 +4376,16 @@ assert( szRegion==pShmNode->szRegion || pShmNode->nRegion==0 ); assert( pShmNode->pInode==pDbFd->pInode ); assert( pShmNode->h>=0 || pDbFd->pInode->bProcessLock==1 ); assert( pShmNode->h<0 || pDbFd->pInode->bProcessLock==0 ); - if( pShmNode->nRegion<=iRegion ){ + /* Minimum number of regions required to be mapped. */ + nReqRegion = ((iRegion+nShmPerMap) / nShmPerMap) * nShmPerMap; + + if( pShmNode->nRegionszRegion = szRegion; if( pShmNode->h>=0 ){ @@ -4390,21 +4430,23 @@ } } /* Map the requested memory region into this processes address space. */ apNew = (char **)sqlite3_realloc( - pShmNode->apRegion, (iRegion+1)*sizeof(char *) + pShmNode->apRegion, nReqRegion*sizeof(char *) ); if( !apNew ){ rc = SQLITE_IOERR_NOMEM; goto shmpage_out; } pShmNode->apRegion = apNew; - while(pShmNode->nRegion<=iRegion){ + while( pShmNode->nRegionh>=0 ){ - pMem = osMmap(0, szRegion, + pMem = osMmap(0, nMap, pShmNode->isReadonly ? PROT_READ : PROT_READ|PROT_WRITE, MAP_SHARED, pShmNode->h, szRegion*(i64)pShmNode->nRegion ); if( pMem==MAP_FAILED ){ rc = unixLogError(SQLITE_IOERR_SHMMAP, "mmap", pShmNode->zFilename); @@ -4416,12 +4458,15 @@ rc = SQLITE_NOMEM; goto shmpage_out; } memset(pMem, 0, szRegion); } - pShmNode->apRegion[pShmNode->nRegion] = pMem; - pShmNode->nRegion++; + + for(i=0; iapRegion[pShmNode->nRegion+i] = &((char*)pMem)[szRegion*i]; + } + pShmNode->nRegion += nShmPerMap; } } shmpage_out: if( pShmNode->nRegion>iRegion ){ @@ -4631,25 +4676,10 @@ #endif } #if SQLITE_MAX_MMAP_SIZE>0 /* -** Return the system page size. -*/ -static int unixGetPagesize(void){ -#if HAVE_MREMAP - return 512; -#elif defined(_BSD_SOURCE) - return getpagesize(); -#else - return (int)sysconf(_SC_PAGESIZE); -#endif -} -#endif /* SQLITE_MAX_MMAP_SIZE>0 */ - -#if SQLITE_MAX_MMAP_SIZE>0 -/* ** Attempt to set the size of the memory mapping maintained by file ** descriptor pFd to nNew bytes. Any existing mapping is discarded. ** ** If successful, this function sets the following variables: ** @@ -4680,12 +4712,16 @@ assert( MAP_FAILED!=0 ); if( (pFd->ctrlFlags & UNIXFILE_RDONLY)==0 ) flags |= PROT_WRITE; if( pOrig ){ - const int szSyspage = unixGetPagesize(); +#if HAVE_MREMAP + i64 nReuse = pFd->mmapSize; +#else + const int szSyspage = osGetpagesize(); i64 nReuse = (pFd->mmapSize & ~(szSyspage-1)); +#endif u8 *pReq = &pOrig[nReuse]; /* Unmap any pages of the existing mapping that cannot be reused. */ if( nReuse!=nOrig ){ osMunmap(pReq, nOrig-nReuse); @@ -7427,11 +7463,11 @@ }; unsigned int i; /* Loop counter */ /* Double-check that the aSyscall[] array has been constructed ** correctly. See ticket [bb3a86e890c8e96ab] */ - assert( ArraySize(aSyscall)==24 ); + assert( ArraySize(aSyscall)==25 ); /* Register all VFSes defined in the aVfs[] array */ for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){ sqlite3_vfs_register(&aVfs[i], i==0); } Index: src/test_syscall.c ================================================================== --- src/test_syscall.c +++ src/test_syscall.c @@ -65,10 +65,15 @@ ** Return true if the named system call exists. Or false otherwise. ** ** test_syscall list ** Return a list of all system calls. The list is constructed using ** the xNextSystemCall() VFS method. +** +** test_syscall pagesize PGSZ +** If PGSZ is a power of two greater than 256, install a wrapper around +** OS function getpagesize() that reports the system page size as PGSZ. +** Or, if PGSZ is less than zero, remove any wrapper already installed. */ #include "sqliteInt.h" #include "sqlite3.h" #include "tcl.h" @@ -87,11 +92,13 @@ static struct TestSyscallGlobal { int bPersist; /* 1 for persistent errors, 0 for transient */ int nCount; /* Fail after this many more calls */ int nFail; /* Number of failures that have occurred */ -} gSyscall = { 0, 0 }; + int pgsz; + sqlite3_syscall_ptr orig_getpagesize; +} gSyscall = { 0, 0, 0, 0, 0 }; static int ts_open(const char *, int, int); static int ts_close(int fd); static int ts_access(const char *zPath, int mode); static char *ts_getcwd(char *zPath, size_t nPath); @@ -647,10 +654,49 @@ pVfs = sqlite3_vfs_find(0); Tcl_SetObjResult(interp, Tcl_NewStringObj(pVfs->zName, -1)); return TCL_OK; } + +static int ts_getpagesize(void){ + return gSyscall.pgsz; +} + +static int test_syscall_pagesize( + void * clientData, + Tcl_Interp *interp, + int objc, + Tcl_Obj *CONST objv[] +){ + sqlite3_vfs *pVfs = sqlite3_vfs_find(0); + int pgsz; + if( objc!=3 ){ + Tcl_WrongNumArgs(interp, 2, objv, "PGSZ"); + return TCL_ERROR; + } + if( Tcl_GetIntFromObj(interp, objv[2], &pgsz) ){ + return TCL_ERROR; + } + + if( pgsz<0 ){ + if( gSyscall.orig_getpagesize ){ + pVfs->xSetSystemCall(pVfs, "getpagesize", gSyscall.orig_getpagesize); + } + }else{ + if( pgsz<512 || (pgsz & (pgsz-1)) ){ + Tcl_AppendResult(interp, "pgsz out of range", 0); + return TCL_ERROR; + } + gSyscall.orig_getpagesize = pVfs->xGetSystemCall(pVfs, "getpagesize"); + gSyscall.pgsz = pgsz; + pVfs->xSetSystemCall( + pVfs, "getpagesize", (sqlite3_syscall_ptr)ts_getpagesize + ); + } + + return TCL_OK; +} static int test_syscall( void * clientData, Tcl_Interp *interp, int objc, @@ -666,10 +712,11 @@ { "reset", test_syscall_reset }, { "errno", test_syscall_errno }, { "exists", test_syscall_exists }, { "list", test_syscall_list }, { "defaultvfs", test_syscall_defaultvfs }, + { "pagesize", test_syscall_pagesize }, { 0, 0 } }; int iCmd; int rc; Index: test/syscall.test ================================================================== --- test/syscall.test +++ test/syscall.test @@ -59,10 +59,11 @@ foreach s { open close access getcwd stat fstat ftruncate fcntl read pread write pwrite fchmod fallocate pread64 pwrite64 unlink openDirectory mkdir rmdir statvfs fchown umask mmap munmap mremap + getpagesize } { if {[test_syscall exists $s]} {lappend syscall_list $s} } do_test 3.1 { lsort [test_syscall list] } [lsort $syscall_list] ADDED test/wal64k.test Index: test/wal64k.test ================================================================== --- test/wal64k.test +++ test/wal64k.test @@ -0,0 +1,47 @@ +# 2010 April 13 +# +# The author disclaims copyright to this source code. In place of +# a legal notice, here is a blessing: +# +# May you do good and not evil. +# May you find forgiveness for yourself and forgive others. +# May you share freely, never taking more than you give. +# +#*********************************************************************** +# This file implements regression tests for SQLite library. The +# focus of this file is testing the operation of the library in +# "PRAGMA journal_mode=WAL" mode. +# + +set testdir [file dirname $argv0] +source $testdir/tester.tcl +set testprefix wal64k + +ifcapable !wal {finish_test ; return } + +db close +test_syscall pagesize 65536 +sqlite3 db test.db + +do_execsql_test 1.0 { + PRAGMA journal_mode = WAL; + CREATE TABLE t1(x); + CREATE INDEX i1 ON t1(x); +} {wal} +do_test 1.1 { file size test.db-shm } {65536} + +do_test 1.2 { + execsql BEGIN + while {[file size test.db-shm]==65536} { + execsql { INSERT INTO t1 VALUES( randstr(900,1100) ) } + } + execsql COMMIT + file size test.db-shm +} {131072} + +integrity_check 1.3 + +db close +test_syscall pagesize -1 +finish_test +