33 * sysv_shmem.c
44 * Implement shared memory using SysV facilities
55 *
6- * These routines represent a fairly thin layer on top of SysV shared
7- * memory functionality.
6+ * These routines used to be a fairly thin layer on top of SysV shared
7+ * memory functionality. With the addition of anonymous-shmem logic,
8+ * they're a bit fatter now. We still require a SysV shmem block to
9+ * exist, though, because mmap'd shmem provides no way to find out how
10+ * many processes are attached, which we need for interlocking purposes.
811 *
912 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
1013 * Portions Copyright (c) 1994, Regents of the University of California
3639#include "utils/guc.h"
3740
3841
42+ /*
43+ * As of PostgreSQL 9.3, we normally allocate only a very small amount of
44+ * System V shared memory, and only for the purposes of providing an
45+ * interlock to protect the data directory. The real shared memory block
46+ * is allocated using mmap(). This works around the problem that many
47+ * systems have very low limits on the amount of System V shared memory
48+ * that can be allocated. Even a limit of a few megabytes will be enough
49+ * to run many copies of PostgreSQL without needing to adjust system settings.
50+ *
51+ * We assume that no one will attempt to run PostgreSQL 9.3 or later on
52+ * systems that are ancient enough that anonymous shared memory is not
53+ * supported, such as pre-2.4 versions of Linux. If that turns out to be
54+ * false, we might need to add compile and/or run-time tests here and do this
55+ * only if the running kernel supports it.
56+ *
57+ * However, we must always disable this logic in the EXEC_BACKEND case, and
58+ * fall back to the old method of allocating the entire segment using System V
59+ * shared memory, because there's no way to attach an anonymous mmap'd segment
60+ * to a process after exec(). Since EXEC_BACKEND is intended only for
61+ * developer use, this shouldn't be a big problem. Because of this, we do
62+ * not worry about supporting anonymous shmem in the EXEC_BACKEND cases below.
63+ */
64+ #ifndef EXEC_BACKEND
65+ #define USE_ANONYMOUS_SHMEM
66+ #endif
67+
68+
3969typedef key_t IpcMemoryKey ; /* shared memory key passed to shmget(2) */
4070typedef int IpcMemoryId ; /* shared memory ID returned by shmget(2) */
4171
4272
4373unsigned long UsedShmemSegID = 0 ;
4474void * UsedShmemSegAddr = NULL ;
75+
76+ #ifdef USE_ANONYMOUS_SHMEM
4577static Size AnonymousShmemSize ;
4678static void * AnonymousShmem = NULL ;
79+ #endif
4780
4881static void * InternalIpcMemoryCreate (IpcMemoryKey memKey , Size size );
4982static void IpcMemoryDetach (int status , Datum shmaddr );
@@ -204,10 +237,6 @@ IpcMemoryDetach(int status, Datum shmaddr)
204237 /* Detach System V shared memory block. */
205238 if (shmdt (DatumGetPointer (shmaddr )) < 0 )
206239 elog (LOG , "shmdt(%p) failed: %m" , DatumGetPointer (shmaddr ));
207- /* Release anonymous shared memory block, if any. */
208- if (AnonymousShmem != NULL
209- && munmap (AnonymousShmem , AnonymousShmemSize ) < 0 )
210- elog (LOG , "munmap(%p) failed: %m" , AnonymousShmem );
211240}
212241
213242/****************************************************************************/
@@ -318,14 +347,15 @@ PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
318347 return true;
319348}
320349
350+ #ifdef USE_ANONYMOUS_SHMEM
351+
321352/*
322353 * Creates an anonymous mmap()ed shared memory segment.
323354 *
324355 * Pass the requested size in *size. This function will modify *size to the
325356 * actual size of the allocation, if it ends up allocating a segment that is
326357 * larger than requested.
327358 */
328- #ifndef EXEC_BACKEND
329359static void *
330360CreateAnonymousSegment (Size * size )
331361{
@@ -334,10 +364,8 @@ CreateAnonymousSegment(Size *size)
334364 int mmap_errno = 0 ;
335365
336366#ifndef MAP_HUGETLB
337- if (huge_pages == HUGE_PAGES_ON )
338- ereport (ERROR ,
339- (errcode (ERRCODE_FEATURE_NOT_SUPPORTED ),
340- errmsg ("huge TLB pages not supported on this platform" )));
367+ /* PGSharedMemoryCreate should have dealt with this case */
368+ Assert (huge_pages != HUGE_PAGES_ON );
341369#else
342370 if (huge_pages == HUGE_PAGES_ON || huge_pages == HUGE_PAGES_TRY )
343371 {
@@ -366,12 +394,12 @@ CreateAnonymousSegment(Size *size)
366394 PG_MMAP_FLAGS | MAP_HUGETLB , -1 , 0 );
367395 mmap_errno = errno ;
368396 if (huge_pages == HUGE_PAGES_TRY && ptr == MAP_FAILED )
369- elog (DEBUG1 , "mmap with MAP_HUGETLB failed, huge pages disabled: %m" );
397+ elog (DEBUG1 , "mmap(%zu) with MAP_HUGETLB failed, huge pages disabled: %m" ,
398+ allocsize );
370399 }
371400#endif
372401
373- if (huge_pages == HUGE_PAGES_OFF ||
374- (huge_pages == HUGE_PAGES_TRY && ptr == MAP_FAILED ))
402+ if (ptr == MAP_FAILED && huge_pages != HUGE_PAGES_ON )
375403 {
376404 /*
377405 * use the original size, not the rounded up value, when falling back
@@ -401,7 +429,25 @@ CreateAnonymousSegment(Size *size)
401429 * size = allocsize ;
402430 return ptr ;
403431}
404- #endif
432+
433+ /*
434+ * AnonymousShmemDetach --- detach from an anonymous mmap'd block
435+ * (called as an on_shmem_exit callback, hence funny argument list)
436+ */
437+ static void
438+ AnonymousShmemDetach (int status , Datum arg )
439+ {
440+ /* Release anonymous shared memory block, if any. */
441+ if (AnonymousShmem != NULL )
442+ {
443+ if (munmap (AnonymousShmem , AnonymousShmemSize ) < 0 )
444+ elog (LOG , "munmap(%p, %zu) failed: %m" ,
445+ AnonymousShmem , AnonymousShmemSize );
446+ AnonymousShmem = NULL ;
447+ }
448+ }
449+
450+ #endif /* USE_ANONYMOUS_SHMEM */
405451
406452/*
407453 * PGSharedMemoryCreate
@@ -432,7 +478,8 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port,
432478 struct stat statbuf ;
433479 Size sysvsize ;
434480
435- #if defined(EXEC_BACKEND ) || !defined(MAP_HUGETLB )
481+ /* Complain if hugepages demanded but we can't possibly support them */
482+ #if !defined(USE_ANONYMOUS_SHMEM ) || !defined(MAP_HUGETLB )
436483 if (huge_pages == HUGE_PAGES_ON )
437484 ereport (ERROR ,
438485 (errcode (ERRCODE_FEATURE_NOT_SUPPORTED ),
@@ -442,32 +489,13 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port,
442489 /* Room for a header? */
443490 Assert (size > MAXALIGN (sizeof (PGShmemHeader )));
444491
445- /*
446- * As of PostgreSQL 9.3, we normally allocate only a very small amount of
447- * System V shared memory, and only for the purposes of providing an
448- * interlock to protect the data directory. The real shared memory block
449- * is allocated using mmap(). This works around the problem that many
450- * systems have very low limits on the amount of System V shared memory
451- * that can be allocated. Even a limit of a few megabytes will be enough
452- * to run many copies of PostgreSQL without needing to adjust system
453- * settings.
454- *
455- * We assume that no one will attempt to run PostgreSQL 9.3 or later on
456- * systems that are ancient enough that anonymous shared memory is not
457- * supported, such as pre-2.4 versions of Linux. If that turns out to be
458- * false, we might need to add a run-time test here and do this only if
459- * the running kernel supports it.
460- *
461- * However, we disable this logic in the EXEC_BACKEND case, and fall back
462- * to the old method of allocating the entire segment using System V
463- * shared memory, because there's no way to attach an mmap'd segment to a
464- * process after exec(). Since EXEC_BACKEND is intended only for
465- * developer use, this shouldn't be a big problem.
466- */
467- #ifndef EXEC_BACKEND
492+ #ifdef USE_ANONYMOUS_SHMEM
468493 AnonymousShmem = CreateAnonymousSegment (& size );
469494 AnonymousShmemSize = size ;
470495
496+ /* Register on-exit routine to unmap the anonymous segment */
497+ on_shmem_exit (AnonymousShmemDetach , (Datum ) 0 );
498+
471499 /* Now we need only allocate a minimal-sized SysV shmem block. */
472500 sysvsize = sizeof (PGShmemHeader );
473501#else
@@ -572,10 +600,14 @@ PGSharedMemoryCreate(Size size, bool makePrivate, int port,
572600 * block. Otherwise, the System V shared memory block is only a shim, and
573601 * we must return a pointer to the real block.
574602 */
603+ #ifdef USE_ANONYMOUS_SHMEM
575604 if (AnonymousShmem == NULL )
576605 return hdr ;
577606 memcpy (AnonymousShmem , hdr , sizeof (PGShmemHeader ));
578607 return (PGShmemHeader * ) AnonymousShmem ;
608+ #else
609+ return hdr ;
610+ #endif
579611}
580612
581613#ifdef EXEC_BACKEND
@@ -660,12 +692,12 @@ PGSharedMemoryNoReAttach(void)
660692 *
661693 * Detach from the shared memory segment, if still attached. This is not
662694 * intended to be called explicitly by the process that originally created the
663- * segment (it will have an on_shmem_exit callback registered to do that).
695+ * segment (it will have on_shmem_exit callback(s) registered to do that).
664696 * Rather, this is for subprocesses that have inherited an attachment and want
665697 * to get rid of it.
666698 *
667699 * UsedShmemSegID and UsedShmemSegAddr are implicit parameters to this
668- * routine.
700+ * routine, also AnonymousShmem and AnonymousShmemSize .
669701 */
670702void
671703PGSharedMemoryDetach (void )
@@ -682,10 +714,15 @@ PGSharedMemoryDetach(void)
682714 UsedShmemSegAddr = NULL ;
683715 }
684716
685- /* Release anonymous shared memory block, if any. */
686- if (AnonymousShmem != NULL
687- && munmap (AnonymousShmem , AnonymousShmemSize ) < 0 )
688- elog (LOG , "munmap(%p) failed: %m" , AnonymousShmem );
717+ #ifdef USE_ANONYMOUS_SHMEM
718+ if (AnonymousShmem != NULL )
719+ {
720+ if (munmap (AnonymousShmem , AnonymousShmemSize ) < 0 )
721+ elog (LOG , "munmap(%p, %zu) failed: %m" ,
722+ AnonymousShmem , AnonymousShmemSize );
723+ AnonymousShmem = NULL ;
724+ }
725+ #endif
689726}
690727
691728
0 commit comments