Jan Kaluza 2d5641e
--- trunk/modules/metadata/mod_unique_id.c	2011/12/02 23:02:04	1209766
Jan Kaluza 2d5641e
+++ trunk/modules/metadata/mod_unique_id.c	2013/07/10 16:20:31	1501827
Jan Kaluza 2d5641e
@@ -31,14 +31,11 @@
Jan Kaluza 2d5641e
 #include "http_log.h"
Jan Kaluza 2d5641e
 #include "http_protocol.h"  /* for ap_hook_post_read_request */
Jan Kaluza 2d5641e
 
Jan Kaluza 2d5641e
-#if APR_HAVE_UNISTD_H
Jan Kaluza 2d5641e
-#include <unistd.h>         /* for getpid() */
Jan Kaluza 2d5641e
-#endif
Jan Kaluza 2d5641e
+#define ROOT_SIZE 10
Jan Kaluza 2d5641e
 
Jan Kaluza 2d5641e
 typedef struct {
Jan Kaluza 2d5641e
     unsigned int stamp;
Jan Kaluza 2d5641e
-    unsigned int in_addr;
Jan Kaluza 2d5641e
-    unsigned int pid;
Jan Kaluza 2d5641e
+    char root[ROOT_SIZE];
Jan Kaluza 2d5641e
     unsigned short counter;
Jan Kaluza 2d5641e
     unsigned int thread_index;
Jan Kaluza 2d5641e
 } unique_id_rec;
Jan Kaluza 2d5641e
@@ -64,20 +61,15 @@
Jan Kaluza 2d5641e
  * gethostbyname (gethostname()) is unique across all the machines at the
Jan Kaluza 2d5641e
  * "site".
Jan Kaluza 2d5641e
  *
Jan Kaluza 2d5641e
- * We also further assume that pids fit in 32-bits.  If something uses more
Jan Kaluza 2d5641e
- * than 32-bits, the fix is trivial, but it requires the unrolled uuencoding
Jan Kaluza 2d5641e
- * loop to be extended.  * A similar fix is needed to support multithreaded
Jan Kaluza 2d5641e
- * servers, using a pid/tid combo.
Jan Kaluza 2d5641e
- *
Jan Kaluza 2d5641e
- * Together, the in_addr and pid are assumed to absolutely uniquely identify
Jan Kaluza 2d5641e
- * this one child from all other currently running children on all servers
Jan Kaluza 2d5641e
- * (including this physical server if it is running multiple httpds) from each
Jan Kaluza 2d5641e
+ * The root is assumed to absolutely uniquely identify this one child
Jan Kaluza 2d5641e
+ * from all other currently running children on all servers (including
Jan Kaluza 2d5641e
+ * this physical server if it is running multiple httpds) from each
Jan Kaluza 2d5641e
  * other.
Jan Kaluza 2d5641e
  *
Jan Kaluza 2d5641e
- * The stamp and counter are used to distinguish all hits for a particular
Jan Kaluza 2d5641e
- * (in_addr,pid) pair.  The stamp is updated using r->request_time,
Jan Kaluza 2d5641e
- * saving cpu cycles.  The counter is never reset, and is used to permit up to
Jan Kaluza 2d5641e
- * 64k requests in a single second by a single child.
Jan Kaluza 2d5641e
+ * The stamp and counter are used to distinguish all hits for a
Jan Kaluza 2d5641e
+ * particular root.  The stamp is updated using r->request_time,
Jan Kaluza 2d5641e
+ * saving cpu cycles.  The counter is never reset, and is used to
Jan Kaluza 2d5641e
+ * permit up to 64k requests in a single second by a single child.
Jan Kaluza 2d5641e
  *
Jan Kaluza 2d5641e
  * The 144-bits of unique_id_rec are encoded using the alphabet
Jan Kaluza 2d5641e
  * [A-Za-z0-9@-], resulting in 24 bytes of printable characters.  That is then
Jan Kaluza 2d5641e
@@ -92,7 +84,7 @@
Jan Kaluza 2d5641e
  * module change.
Jan Kaluza 2d5641e
  *
Jan Kaluza 2d5641e
  * It is highly desirable that identifiers exist for "eternity".  But future
Jan Kaluza 2d5641e
- * needs (such as much faster webservers, moving to 64-bit pids, or moving to a
Jan Kaluza 2d5641e
+ * needs (such as much faster webservers, or moving to a
Jan Kaluza 2d5641e
  * multithreaded server) may dictate a need to change the contents of
Jan Kaluza 2d5641e
  * unique_id_rec.  Such a future implementation should ensure that the first
Jan Kaluza 2d5641e
  * field is still a time_t stamp.  By doing that, it is possible for a site to
Jan Kaluza 2d5641e
@@ -100,7 +92,15 @@
Jan Kaluza 2d5641e
  * wait one entire second, and then start all of their new-servers.  This
Jan Kaluza 2d5641e
  * procedure will ensure that the new space of identifiers is completely unique
Jan Kaluza 2d5641e
  * from the old space.  (Since the first four unencoded bytes always differ.)
Jan Kaluza 2d5641e
+ *
Jan Kaluza 2d5641e
+ * Note: previous implementations used 32-bits of IP address plus pid
Jan Kaluza 2d5641e
+ * in place of the PRNG output in the "root" field.  This was
Jan Kaluza 2d5641e
+ * insufficient for IPv6-only hosts, required working DNS to determine
Jan Kaluza 2d5641e
+ * a unique IP address (fragile), and needed a [0, 1) second sleep
Jan Kaluza 2d5641e
+ * call at startup to avoid pid reuse.  Use of the PRNG avoids all
Jan Kaluza 2d5641e
+ * these issues.
Jan Kaluza 2d5641e
  */
Jan Kaluza 2d5641e
+
Jan Kaluza 2d5641e
 /*
Jan Kaluza 2d5641e
  * Sun Jun  7 05:43:49 CEST 1998 -- Alvaro
Jan Kaluza 2d5641e
  * More comments:
Jan Kaluza 2d5641e
@@ -116,8 +116,6 @@
Jan Kaluza 2d5641e
  * htonl/ntohl. Well, this shouldn't be a problem till year 2106.
Jan Kaluza 2d5641e
  */
Jan Kaluza 2d5641e
 
Jan Kaluza 2d5641e
-static unsigned global_in_addr;
Jan Kaluza 2d5641e
-
Jan Kaluza 2d5641e
 /*
Jan Kaluza 2d5641e
  * XXX: We should have a per-thread counter and not use cur_unique_id.counter
Jan Kaluza 2d5641e
  * XXX: in all threads, because this is bad for performance on multi-processor
Jan Kaluza 2d5641e
@@ -129,7 +127,7 @@
Jan Kaluza 2d5641e
 /*
Jan Kaluza 2d5641e
  * Number of elements in the structure unique_id_rec.
Jan Kaluza 2d5641e
  */
Jan Kaluza 2d5641e
-#define UNIQUE_ID_REC_MAX 5
Jan Kaluza 2d5641e
+#define UNIQUE_ID_REC_MAX 4
Jan Kaluza 2d5641e
 
Jan Kaluza 2d5641e
 static unsigned short unique_id_rec_offset[UNIQUE_ID_REC_MAX],
Jan Kaluza 2d5641e
                       unique_id_rec_size[UNIQUE_ID_REC_MAX],
Jan Kaluza 2d5641e
@@ -138,113 +136,32 @@
Jan Kaluza 2d5641e
 
Jan Kaluza 2d5641e
 static int unique_id_global_init(apr_pool_t *p, apr_pool_t *plog, apr_pool_t *ptemp, server_rec *main_server)
Jan Kaluza 2d5641e
 {
Jan Kaluza 2d5641e
-    char str[APRMAXHOSTLEN + 1];
Jan Kaluza 2d5641e
-    apr_status_t rv;
Jan Kaluza 2d5641e
-    char *ipaddrstr;
Jan Kaluza 2d5641e
-    apr_sockaddr_t *sockaddr;
Jan Kaluza 2d5641e
-
Jan Kaluza 2d5641e
     /*
Jan Kaluza 2d5641e
      * Calculate the sizes and offsets in cur_unique_id.
Jan Kaluza 2d5641e
      */
Jan Kaluza 2d5641e
     unique_id_rec_offset[0] = APR_OFFSETOF(unique_id_rec, stamp);
Jan Kaluza 2d5641e
     unique_id_rec_size[0] = sizeof(cur_unique_id.stamp);
Jan Kaluza 2d5641e
-    unique_id_rec_offset[1] = APR_OFFSETOF(unique_id_rec, in_addr);
Jan Kaluza 2d5641e
-    unique_id_rec_size[1] = sizeof(cur_unique_id.in_addr);
Jan Kaluza 2d5641e
-    unique_id_rec_offset[2] = APR_OFFSETOF(unique_id_rec, pid);
Jan Kaluza 2d5641e
-    unique_id_rec_size[2] = sizeof(cur_unique_id.pid);
Jan Kaluza 2d5641e
-    unique_id_rec_offset[3] = APR_OFFSETOF(unique_id_rec, counter);
Jan Kaluza 2d5641e
-    unique_id_rec_size[3] = sizeof(cur_unique_id.counter);
Jan Kaluza 2d5641e
-    unique_id_rec_offset[4] = APR_OFFSETOF(unique_id_rec, thread_index);
Jan Kaluza 2d5641e
-    unique_id_rec_size[4] = sizeof(cur_unique_id.thread_index);
Jan Kaluza 2d5641e
+    unique_id_rec_offset[1] = APR_OFFSETOF(unique_id_rec, root);
Jan Kaluza 2d5641e
+    unique_id_rec_size[1] = sizeof(cur_unique_id.root);
Jan Kaluza 2d5641e
+    unique_id_rec_offset[2] = APR_OFFSETOF(unique_id_rec, counter);
Jan Kaluza 2d5641e
+    unique_id_rec_size[2] = sizeof(cur_unique_id.counter);
Jan Kaluza 2d5641e
+    unique_id_rec_offset[3] = APR_OFFSETOF(unique_id_rec, thread_index);
Jan Kaluza 2d5641e
+    unique_id_rec_size[3] = sizeof(cur_unique_id.thread_index);
Jan Kaluza 2d5641e
     unique_id_rec_total_size = unique_id_rec_size[0] + unique_id_rec_size[1] +
Jan Kaluza 2d5641e
-                               unique_id_rec_size[2] + unique_id_rec_size[3] +
Jan Kaluza 2d5641e
-                               unique_id_rec_size[4];
Jan Kaluza 2d5641e
+                               unique_id_rec_size[2] + unique_id_rec_size[3];
Jan Kaluza 2d5641e
 
Jan Kaluza 2d5641e
     /*
Jan Kaluza 2d5641e
      * Calculate the size of the structure when encoded.
Jan Kaluza 2d5641e
      */
Jan Kaluza 2d5641e
     unique_id_rec_size_uu = (unique_id_rec_total_size*8+5)/6;
Jan Kaluza 2d5641e
 
Jan Kaluza 2d5641e
-    /*
Jan Kaluza 2d5641e
-     * Now get the global in_addr.  Note that it is not sufficient to use one
Jan Kaluza 2d5641e
-     * of the addresses from the main_server, since those aren't as likely to
Jan Kaluza 2d5641e
-     * be unique as the physical address of the machine
Jan Kaluza 2d5641e
-     */
Jan Kaluza 2d5641e
-    if ((rv = apr_gethostname(str, sizeof(str) - 1, p)) != APR_SUCCESS) {
Jan Kaluza 2d5641e
-        ap_log_error(APLOG_MARK, APLOG_ALERT, rv, main_server, APLOGNO(01563)
Jan Kaluza 2d5641e
-          "unable to find hostname of the server");
Jan Kaluza 2d5641e
-        return HTTP_INTERNAL_SERVER_ERROR;
Jan Kaluza 2d5641e
-    }
Jan Kaluza 2d5641e
-
Jan Kaluza 2d5641e
-    if ((rv = apr_sockaddr_info_get(&sockaddr, str, AF_INET, 0, 0, p)) == APR_SUCCESS) {
Jan Kaluza 2d5641e
-        global_in_addr = sockaddr->sa.sin.sin_addr.s_addr;
Jan Kaluza 2d5641e
-    }
Jan Kaluza 2d5641e
-    else {
Jan Kaluza 2d5641e
-        ap_log_error(APLOG_MARK, APLOG_ALERT, rv, main_server, APLOGNO(01564)
Jan Kaluza 2d5641e
-                    "unable to find IPv4 address of \"%s\"", str);
Jan Kaluza 2d5641e
-#if APR_HAVE_IPV6
Jan Kaluza 2d5641e
-        if ((rv = apr_sockaddr_info_get(&sockaddr, str, AF_INET6, 0, 0, p)) == APR_SUCCESS) {
Jan Kaluza 2d5641e
-            memcpy(&global_in_addr,
Jan Kaluza 2d5641e
-                   (char *)sockaddr->ipaddr_ptr + sockaddr->ipaddr_len - sizeof(global_in_addr),
Jan Kaluza 2d5641e
-                   sizeof(global_in_addr));
Jan Kaluza 2d5641e
-            ap_log_error(APLOG_MARK, APLOG_ALERT, rv, main_server, APLOGNO(01565)
Jan Kaluza 2d5641e
-                         "using low-order bits of IPv6 address "
Jan Kaluza 2d5641e
-                         "as if they were unique");
Jan Kaluza 2d5641e
-        }
Jan Kaluza 2d5641e
-        else
Jan Kaluza 2d5641e
-#endif
Jan Kaluza 2d5641e
-        return HTTP_INTERNAL_SERVER_ERROR;
Jan Kaluza 2d5641e
-    }
Jan Kaluza 2d5641e
-
Jan Kaluza 2d5641e
-    apr_sockaddr_ip_get(&ipaddrstr, sockaddr);
Jan Kaluza 2d5641e
-    ap_log_error(APLOG_MARK, APLOG_INFO, 0, main_server, APLOGNO(01566) "using ip addr %s",
Jan Kaluza 2d5641e
-                 ipaddrstr);
Jan Kaluza 2d5641e
-
Jan Kaluza 2d5641e
-    /*
Jan Kaluza 2d5641e
-     * If the server is pummelled with restart requests we could possibly end
Jan Kaluza 2d5641e
-     * up in a situation where we're starting again during the same second
Jan Kaluza 2d5641e
-     * that has been used in previous identifiers.  Avoid that situation.
Jan Kaluza 2d5641e
-     *
Jan Kaluza 2d5641e
-     * In truth, for this to actually happen not only would it have to restart
Jan Kaluza 2d5641e
-     * in the same second, but it would have to somehow get the same pids as
Jan Kaluza 2d5641e
-     * one of the other servers that was running in that second. Which would
Jan Kaluza 2d5641e
-     * mean a 64k wraparound on pids ... not very likely at all.
Jan Kaluza 2d5641e
-     *
Jan Kaluza 2d5641e
-     * But protecting against it is relatively cheap.  We just sleep into the
Jan Kaluza 2d5641e
-     * next second.
Jan Kaluza 2d5641e
-     */
Jan Kaluza 2d5641e
-    apr_sleep(apr_time_from_sec(1) - apr_time_usec(apr_time_now()));
Jan Kaluza 2d5641e
     return OK;
Jan Kaluza 2d5641e
 }
Jan Kaluza 2d5641e
 
Jan Kaluza 2d5641e
 static void unique_id_child_init(apr_pool_t *p, server_rec *s)
Jan Kaluza 2d5641e
 {
Jan Kaluza 2d5641e
-    pid_t pid;
Jan Kaluza 2d5641e
-
Jan Kaluza 2d5641e
-    /*
Jan Kaluza 2d5641e
-     * Note that we use the pid because it's possible that on the same
Jan Kaluza 2d5641e
-     * physical machine there are multiple servers (i.e. using Listen). But
Jan Kaluza 2d5641e
-     * it's guaranteed that none of them will share the same pids between
Jan Kaluza 2d5641e
-     * children.
Jan Kaluza 2d5641e
-     *
Jan Kaluza 2d5641e
-     * XXX: for multithread this needs to use a pid/tid combo and probably
Jan Kaluza 2d5641e
-     * needs to be expanded to 32 bits
Jan Kaluza 2d5641e
-     */
Jan Kaluza 2d5641e
-    pid = getpid();
Jan Kaluza 2d5641e
-    cur_unique_id.pid = pid;
Jan Kaluza 2d5641e
-
Jan Kaluza 2d5641e
-    /*
Jan Kaluza 2d5641e
-     * Test our assumption that the pid is 32-bits.  It's possible that
Jan Kaluza 2d5641e
-     * 64-bit machines will declare pid_t to be 64 bits but only use 32
Jan Kaluza 2d5641e
-     * of them.  It would have been really nice to test this during
Jan Kaluza 2d5641e
-     * global_init ... but oh well.
Jan Kaluza 2d5641e
-     */
Jan Kaluza 2d5641e
-    if ((pid_t)cur_unique_id.pid != pid) {
Jan Kaluza 2d5641e
-        ap_log_error(APLOG_MARK, APLOG_CRIT, 0, s, APLOGNO(01567)
Jan Kaluza 2d5641e
-                    "oh no! pids are greater than 32-bits!  I'm broken!");
Jan Kaluza 2d5641e
-    }
Jan Kaluza 2d5641e
-
Jan Kaluza 2d5641e
-    cur_unique_id.in_addr = global_in_addr;
Jan Kaluza 2d5641e
+    ap_random_insecure_bytes(&cur_unique_id.root,
Jan Kaluza 2d5641e
+                             sizeof(cur_unique_id.root));
Jan Kaluza 2d5641e
 
Jan Kaluza 2d5641e
     /*
Jan Kaluza 2d5641e
      * If we use 0 as the initial counter we have a little less protection
Jan Kaluza 2d5641e
@@ -253,13 +170,6 @@
Jan Kaluza 2d5641e
      */
Jan Kaluza 2d5641e
     ap_random_insecure_bytes(&cur_unique_id.counter,
Jan Kaluza 2d5641e
                              sizeof(cur_unique_id.counter));
Jan Kaluza 2d5641e
-
Jan Kaluza 2d5641e
-    /*
Jan Kaluza 2d5641e
-     * We must always use network ordering for these bytes, so that
Jan Kaluza 2d5641e
-     * identifiers are comparable between machines of different byte
Jan Kaluza 2d5641e
-     * orderings.  Note in_addr is already in network order.
Jan Kaluza 2d5641e
-     */
Jan Kaluza 2d5641e
-    cur_unique_id.pid = htonl(cur_unique_id.pid);
Jan Kaluza 2d5641e
 }
Jan Kaluza 2d5641e
 
Jan Kaluza 2d5641e
 /* NOTE: This is *NOT* the same encoding used by base64encode ... the last two
Jan Kaluza 2d5641e
@@ -291,10 +201,8 @@
Jan Kaluza 2d5641e
     unsigned short counter;
Jan Kaluza 2d5641e
     int i,j,k;
Jan Kaluza 2d5641e
 
Jan Kaluza 2d5641e
-    new_unique_id.in_addr = cur_unique_id.in_addr;
Jan Kaluza 2d5641e
-    new_unique_id.pid = cur_unique_id.pid;
Jan Kaluza 2d5641e
+    memcpy(&new_unique_id.root, &cur_unique_id.root, ROOT_SIZE);
Jan Kaluza 2d5641e
     new_unique_id.counter = cur_unique_id.counter;
Jan Kaluza 2d5641e
-
Jan Kaluza 2d5641e
     new_unique_id.stamp = htonl((unsigned int)apr_time_sec(r->request_time));
Jan Kaluza 2d5641e
     new_unique_id.thread_index = htonl((unsigned int)r->connection->id);
Jan Kaluza 2d5641e