A Scheme program for Emil Post's game of tag

emil.s
by Brian Hayes
211 Dacian Avenue
Durham, NC 27701

bhayes@amsci.org


;;; a collection of programs for working with Post tag systems
;;; optimized for cataloguing for the periods of cyclic sequences

;;; This was written and tested with the MacGambit Scheme compiler,
;;; but it should run with little modification on other Scheme
;;; systems that conform to the IEEE or R4RS standards. The only
;;; MacGambit idiosyncrasies--as far as I know--are the declarations
;;; at the head of the file (which should affect only efficiency
;;; and can be deleted) and the ##fixnum.logand procedure, which
;;; is used in three places as a shortcut method of doing modular
;;; addition. Some other Scheme implementations include equivalent
;;; bitwise logical operators, although they are likely to be named
;;; differently. If no 'logand' is available, it will have to be
;;; replaced with a calculation using the 'modulo' operator. For
;;; example, the 'q++' procedure could be rewritten as:
;;;
;;;    (define q++ 
;;;      (lambda (ptr)
;;;        (modulo (+ ptr 1) max-q)))

;;; The basic architecture of the program is based on a queue,
;;; which holds an encoded form of the evolving tag sequence. The
;;; queue in turn is implemented by a Scheme vector, with auxiliary
;;; variables that point to the current head and tail of the queue.
;;; When terms are deleted from the head of the queue and appended
;;; to the tail, nothing moves in memory; instead the head and tail
;;; pointers are updated. (In some instances it is more convenient
;;; to keep track of the queue length instead of the queue tail, and
;;; so a 'q-len' variable is also maintained.

;;; The queue itself and its various auxiliary pointers are defined
;;; as global variables so as to avoid having to pass them as 
;;; arguments. This was a misguided design decision, intended to
;;; minimize garbage collection; I would do it differently next time.

;;; I also have doubts about the complex encoding of the binary
;;; sequence (see below for an explanation). The encoding exploits
;;; the fact that only every third digit in a sequence can influence
;;; the evolution of the system, since only digits 1, 4, 7, 10... can
;;; come to the head of the sequence. Hence there is no point in
;;; storing the intermediate digits. The space savings is nontrivial
;;; with sequences that grow to 50,000 or more digits, but IÍm not
;;; sure about the effect on execution speed.

;;; A great deal more space could be saved by encoding the tag
;;; sequence in machine bits, which would also allow the use of
;;; hardware shift operations to implement the basic tag rule. But
;;; the bother of masking and extracting individual bits might well
;;; have a high cost. Experiments are needed.



;;; compiler declarations for maximum speed, minimum safety

(##declare
  (extended-bindings)
  (not safe)
  (block)
  (fixnum))


;;; Sequences are represented by means of a two-part
;;; notation. Within the sequence vector, the first 3k
;;; entries are compressed according to the following
;;; simple mapping:
;;;                     0XX --> 0
;;;                     1XX --> 1
;;;
;;; where "X" is either 0 or 1. In addition, the remainder
;;; of the sequence (which can consist or 0, 1 or 2 entries)
;;; is encoded in an integer-valued state variable, according
;;; to this mapping:
;;;                     nil --> 0
;;;                      0  --> 1
;;;                      1  --> 2
;;;                      00 --> 3
;;;                      01 --> 4
;;;                      10 --> 5
;;;                      11 --> 6

;;; The transformation of the compacted sequence is handled
;;; by the following transition table:

;
;   STATE TRANSITION TABLE
;
;  head   state    tail          state    append
;
;  0      0        ()     -->    3        ()
;  0      1        0      -->    0        0
;  0      2        1      -->    0        1
;  0      3        00     -->    1        0
;  0      4        01     -->    1        0
;  0      5        10     -->    1        1
;  0      6        11     -->    1        1
;  
;  1      0        ()     -->    2        1
;  1      1        0      -->    4        0
;  1      2        1      -->    4        1
;  1      3        00     -->    0        0
;  1      4        01     -->    0        0
;  1      5        10     -->    0        1
;  1      6        11     -->    0        1

(define map-state
  (let ((vec (vector '(3)         ;state 0-0
                     '(0 z)       ;state 0-1
                     '(0 i)       ;state 0-2
                     '(1 z)       ;state 0-3
                     '(1 z)       ;state 0-4
                     '(1 i)       ;state 0-5
                     '(1 i)       ;state 0-6
                     '(2 i)       ;state 1-0
                     '(4 z)       ;state 1-1
                     '(4 i)       ;state 1-2
                     '(0 z i)     ;state 1-3
                     '(0 z i)     ;state 1-4
                     '(0 i i)     ;state 1-5
                     '(0 i i))))  ;state 1-6
    (lambda (head state)
      (vector-ref vec (+ (* 7 head) state)))))
      




;;; global variables that implement
;;; the tag queue

(define max-q (expt 2 13))   ;;; NOTE: must be power of 2

(define seq (make-vector max-q))

(define q-mask (- max-q 1))

(define q-state #f)

(define qh #f)               ;;; head pointer

(define q-len #f)            ;;; queue length

(define seq-len #f)          ;;; sequence length /= queue length



;;; procedures for manipulating the queue pointers
;;; Note that the queue length is the primary variable;
;;; the queue tail is calculated from it. Also note that
;;; with the tag-system encoding used here, there is
;;; never a need to change a queue pointer by more than
;;; +/- 1, and so only increment and decrement operations
;;; are defined.

(define qt                   ;;; calculate tail pointer
  (lambda ()
    (##fixnum.logand (+ qh (- q-len 1)) q-mask)))

(define q++                  ;;; increment a queue pointer
  (lambda (ptr)
    (##fixnum.logand (+ ptr 1) q-mask)))
               
(define q--                  ;;; decrement 
  (lambda (ptr)
    (##fixnum.logand (- ptr 1) q-mask)))



;;; some miscellaneous utility procedures and synonyms
(define vset! vector-set!)

(define vref vector-ref)

(define xor
  (lambda (a b)
    (if a (not b) b)))

(define show
  (lambda args
    (let loop ((args args))
      (if (null? args)
          (newline)
          (begin
            (display (car args))
            (display "  ")
            (loop (cdr args)))))))
            


;;; It's a convenience to write sequences in a compact
;;; notation where 'I' = '1XX' and 'Z' = '0XX'. In other
;;; words, a sequence such as
;;;
;;;     10000010010000010010010
;;;
;;; is reduced to
;;;
;;;     I  Z  I  I  Z  I  I  10
;;;
;;; The two procedures below convert between 'IZ' notation
;;; and the standard '01' notation.


(define izzify
  (lambda (lis)
    (let loop ((numlis lis) (izlis '() ))
      (if (< (length numlis) 3)
          (append (reverse izlis) numlis)
          (loop (cdddr numlis)
                (cons (if (zero? (car numlis)) 'z 'i) izlis))))))

(define de-izzify
  (lambda (lis)
    (let loop ((izlis lis) (numlis '()))
      (if (null? izlis)
          (reverse numlis)
          (loop (cdr izlis)
                (case (car izlis)
                  ((i) (append '(0 0 1) numlis))
                  ((z) (append '(0 0 0) numlis))
                  ((1) (cons 1 numlis))
                  ((0) (cons 0 numlis))
                  (else (cons #\? numlis))))))))



;;; And these procedures display the content of a queue
;;; in either 'IZ' or conventional notation.

(define display-q-full
  (lambda ()
    (let loop ((qh qh) (q-len q-len))
      (if (not (zero? q-len))
          (begin
            (case (vector-ref seq qh)
              ((z) (display "000"))
              ((i) (display "100"))
              (else (display "???")))
            (loop (q++ qh) (- q-len 1)))))
    (display (vector-ref 
               (vector " " "0" "1" "00" "01" "10" "11") q-state))
    (newline)))


(define display-q-IZ
  (lambda ()
    (let loop ((qh qh) (q-len q-len))
      (if (not (zero? q-len))
          (begin
            (case (vector-ref seq qh)
              ((z) (display "Z"))
              ((i) (display "I"))
              (else (display "?")))
            (loop (q++ qh) (- q-len 1)))))
    (display (vector-ref 
               (vector " " "0" "1" "00" "01" "10" "11") q-state))
    (newline)))
    
    

;;; This is the initialization routine that sets up the
;;; initial queue state, handling the special encoding.
;;; The argument to 'setup-q' can be in either '10' or
;;; 'IZ' notation.
                 
(define setup-q
  (lambda (starting-seq)
    (set! qh 0)
    (set! q-state 0)
    (let loop ((qt 0) (ql 0) (sl 0) (seq-lis starting-seq))
      (cond ((null? seq-lis)
             (set! q-len ql)
             (set! seq-len sl))
            ((member (car seq-lis) '(i "I" "i" #\i #\I))
             (vector-set! seq qt 'i)
             (loop (q++ qt) (+ ql 1) (+ sl 3) (cdr seq-lis)))
            ((member (car seq-lis) '(z "Z" "z" #\z #\Z))
             (vector-set! seq qt 'z)
             (loop (q++ qt) (+ ql 1) (+ sl 3) (cdr seq-lis)))
            ((and (number? (car seq-lis)) (zero? (car seq-lis)))
             (case q-state
               ((0) (set! q-state 1)
                    (loop qt ql (+ sl 1) (cdr seq-lis)))
               ((1) (set! q-state 3)
                    (loop qt ql (+ sl 1) (cdr seq-lis)))
               ((2) (set! q-state 5)
                    (loop qt ql (+ sl 1) (cdr seq-lis)))
               (else (error "Malformed starting sequence: " starting-seq))))
            ((and (number? (car seq-lis)) (= (car seq-lis) 1))
             (case q-state
               ((0) (set! q-state 2)
                    (loop qt ql (+ sl 1) (cdr seq-lis)))
               ((1) (set! q-state 4)
                    (loop qt ql (+ sl 1) (cdr seq-lis)))
               ((2) (set! q-state 6)
                    (loop qt ql (+ sl 1) (cdr seq-lis)))
               (else (error "Malformed starting sequence: " starting-seq))))
            (else (error "Malformed starting sequence: " starting-seq))))))
            
                  

;;; This is what it takes to count by ones when a sequence
;;; is encoded in the 'IZ' notation. 'next-seq' is used when
;;; scanning a range of starting sequences to calculate the
;;; next sequence to be examined. The result it returns will
;;; then be installed in the queue by 'setup-q'.

(define next-seq
  (lambda (izlis)
    (let loop ((old (reverse izlis)) (new '()) (carry #f))
      (cond ((and (null? old) (not carry)) new)
            ((and (null? old) carry) (cons 'i new))
            ((and (number? (car old)) (number? (cadr old)))
             (if (zero? (cadr old))
                 (reverse (append '(0 1) (cddr old)))
                 (loop (cddr old) new #t)))
            ((number? (car old))
             (if (zero? (car old))
                 (reverse (cons '1 (cdr old)))
                 (reverse (append '(0 0) (cdr old)))))
            ((and (null? new)(not carry)) (reverse (cons 0 old)))
            (else (loop (cdr old)
                        (cons (if (xor (eq? (car old) 'i) carry) 'i 'z) new)
                        (and (eq? (car old) 'i) carry)))))))
                 
                  


;;; 'emil-step' is the procedure that implements the
;;; fundamental tag-system rule. 

(define emil-step
  (lambda ()
    (let* ((head (vector-ref seq qh))
           (next (map-state (if (eq? head 'z) 0 1) q-state)))
      (set! q-state (car next))
      (set! qh (q++ qh))
      (set! q-len (- q-len 1))
      (set! seq-len (+ seq-len (if (eq? head 'z) -1 1))) 
      (let loop ((addenda (cdr next)))
        (if (null? addenda)
            '()
            (begin
              (set! q-len (+ q-len 1))
              (vector-set! seq (qt) (car addenda))
              (loop (cdr addenda))))))))
            

;;; 'emil-fate' calls 'emil-step' repeatedly until it can
;;; eventually resolve the outcome of a sequence--whether it
;;; enters a cycle or dwindles away. The search is called
;;; off if the number of iterations exceeds the argument
;;; 'count-limit' or if the sequence grows to fill up the
;;; queue. The value returned is a list made up of the
;;; following elements:
;;;
;;;    an 'IZ' string representation of the starting sequence
;;;    a keyword indicating the fate of the sequence
;;;    the number of iterations need to settle the fate
;;;    the period of the cycle (or 0 if no cycle was detected)
;;;    the maximum length of the sequence
;;;
;;; The keyword in the cadr of this list has the possible
;;; values 'cycle', 'dwindle', 'over-count' and 'over-length'.

;;; Most of the complication here is connected with the need to check
;;; for repeated sequence values in order to detect the onset
;;; of a cycle. This is done with a rather crude method, which
;;; ought to be improved. (The optimum algorithm, devised by
;;; R. W. Gosper, is given in Vol. 2 of KnuthÍs Art of Computer
;;; Programming.

(define emil-fate
  (lambda (chk-interval count-limit)
    (let* ((init-seq (get-seq))
           (chk-seq-old init-seq)
           (chk-count-old 0)
           (chk-seq-new init-seq)
           (chk-count-new 0)
           (chk-state-new q-state)
           (chk-len seq-len)
           (disp-seq (compactify init-seq)))
      (let loop ((count 0) (max-len seq-len))
        (if (= (modulo count chk-interval) 0)
            (begin
              (set! chk-seq-old chk-seq-new)
              (set! chk-seq-new (get-seq))
              (set! chk-count-old chk-count-new)
              (set! chk-count-new count)
              (set! chk-state-new q-state)
              (set! chk-len seq-len)))
        (emil-step)
        (cond ((> count count-limit)
               (list disp-seq 'over-count count 0 max-len))
              ((> q-len (- max-q 5))
               (list disp-seq 'over-length count 0 max-len))
              ((< seq-len 3)
               (list disp-seq 'dwindle count 0 max-len))
              ((and (= seq-len chk-len)
                    (= q-state chk-state-new)
                    (equal-seq? chk-seq-new))
               (let ((period (- count chk-count-new -1)))
                 (list disp-seq 
                       'cycle 
                       (find-start-of-cycle chk-seq-old
                                            chk-count-old
                                            period
                                            count)
                       period
                       max-len)))
              (else (loop (+ count 1) (max max-len seq-len))))))))



;;; an equality predicate for tag sequences. It compares one
;;; sequence in list form with the global queue. This is pretty
;;; crufty, and it was when I got to this point that I began
;;; wishing I had not used all those global variables.

(define equal-seq?
  (lambda (seq-lis)
    (let loop ((qh qh) (q-len q-len) (lis seq-lis))
      (cond ((zero? q-len) #t)
            ((not (equal? (vref seq qh) (car lis))) #f)
            (else (loop (q++ qh) (- q-len 1) (cdr lis)))))))



;;; steps through the gloabl queue to extract a sequence
;;; in list form

(define get-seq
  (lambda ()
    (let ((state-vec (vector '() '(0) '(1) '(0 0) '(0 1) '(1 0) '(1 1))))
      (let loop ((tail (qt)) (seq-lis (vector-ref state-vec q-state)))
        (if (= qh tail)
            (cons (vector-ref seq tail) seq-lis)
            (loop (q-- tail)
                  (cons (vector-ref seq tail) seq-lis)))))))



;;; Having detected the presence of a cycle--by finding a
;;; a repeated sequence--it is necessary to backtrack to
;;; find the starting point of the cycle, i.e., the first
;;; repeated value. The right way to do this is to set up
;;; to sequences N iterations apart (where N is the known
;;; period of the cycle, and step through them in parallel.
;;; But thatÍs not possible here, because of the use of a
;;; single global variable to hold the sequence. (Let that
;;; be a lesson to me.) The workaround adopted here is to
;;; store a range of sequences in a big vector and then
;;; step through those stored values, looking at pairs N
;;; steps apart. 

(define find-start-of-cycle
  (lambda (chk-seq chk-count period current-count)
    (setup-q chk-seq)
    (let* ((interval (+ (- current-count chk-count) 2))
           (vec (make-vector interval)))
      (let loop ((n 0))
        (if (>= n interval)
            #f
            (begin
              (vset! vec n (get-seq))
              (emil-step)
              (loop (+ n 1)))))
      (let loop ((back 0) (front period))
        (if (equal? (vref vec back)
                    (vref vec front))
            (+ front chk-count)
            (loop (+ back 1) (+ front 1)))))))



;;; This generates the string representation of the starting
;;; sequence thatÍs returned by 'emil-fate'. The main reason
;;; for this conversion is that the string is visually more compact
;;; than the list used internally.

(define compactify
  (lambda (seq-lis)
    (let ((str (make-string (length seq-lis))))
      (let loop ((seq-lis seq-lis) (k 0))
        (if (null? seq-lis)
            str
            (let* ((item (car seq-lis))
                   (chr (case item
                          ((z) #\z)
                          ((i) #\i)
                          ((0) #\0)
                          ((1) #\1)
                          (else #\?))))
              (string-set! str k chr)
              (loop (cdr seq-lis) (+ k 1))))))))




;;; 'emil' is the most basic entry point to program. It calls
;;; 'emil-fate' on a range of starting sequences and prints
;;; all the results returned. The arguments are:
;;;
;;;    'how-many' - sequences to test
;;;    'start-seq' - the first sequence to try
;;;    'max-period' - the interval used for detecting cycles
;;;    'max-run' - the number of steps before the program
;;;                gives up on a sequence

(define emil
  (lambda (start-seq how-many max-period max-run)
    (let loop ((count how-many) (start start-seq))
      (if (zero? count)
          'done
          (begin
            (setup-q start)
            (display (emil-fate max-period max-run))
            (newline)
            (loop (- count 1) (next-seq start)))))))
                            


;;; 'period-search' is generally a more useful entry point
;;; for large-scale computations. It runs essentially the
;;; same search as 'emil' (and accepts the same arguments)
;;; but reports results only when a sequence either fails
;;; to terminate or yields a period that has not been seen
;;; before.

(define period-search
  (lambda (start-seq how-many max-period max-run)
    (let ((periods '()))
      (let loop ((count 1) (seq start-seq))
        (if (> count how-many)
            'done
            (begin
              (setup-q seq)
              (let ((fate (emil-fate max-period max-run)))
                (case (cadr fate)
                  ((over-count over-length) (show count fate))
                  ((cycle) (if (not (memv (cadddr fate) periods))
                               (begin (set! periods 
                                            (cons (cadddr fate) periods))
                                      (show count fate))))))
              (loop (+ count 1) (next-seq seq))))))))


;;; some sample input and output

; (emil '(i i i) 20 100 10000)

; (iii    cycle    12 6 16)
; (iii0   cycle    15 6 16)
; (iii1   cycle    11 6 16)
; (iii00  cycle    10 6 16)
; (iii10  cycle    26 6 22)
; (izzz   cycle    27 6 16)
; (izzz0  dwindle  12 0 14)
; (izzz1  cycle    26 6 16)
; (izzz00 cycle    27 6 16)
; (izzz10 cycle     7 4 15)
; (izzi   cycle    13 6 16)
; (izzi0  cycle    30 6 16)
; (izzi1  cycle     4 4 14)
; (izzi00 dwindle  17 0 15)
; (izzi10 cycle     7 6 16)
; (iziz   cycle    29 6 22)
; (iziz0  dwindle  14 0 14)
; (iziz1  dwindle  16 0 14)
; (iziz00 dwindle  19 0 15)
; (iziz10 dwindle 417 0 56)


; (period-search '(i i i) 100 100 10000)

;  1  (iii     cycle 12  6 16)  
; 10  (izzz10  cycle  7  4 15)  
; 74  (izizi00 cycle  2  2 18)  
; 80  (iziiz10 cycle 84 10 34)