Google
 

Trailing-Edge - PDP-10 Archives - clisp - clisp/upsala/char.clisp
There are no other files named char.clisp in the archive.
;;; This is a -*-Lisp-*- file.

;;; **********************************************************************
;;; This code was written as part of the Spice Lisp project at
;;; Carnegie-Mellon University, and has been placed in the public domain.
;;; If you want to use this code or any part of Spice Lisp, please contact
;;; Scott Fahlman (FAHLMAN@CMUC). 
;;; **********************************************************************

;;; Character functions for Spice Lisp.  Part of the standard Spice Lisp
;;; environment.

;;; Written by Guy Steele.
;;; Rewritten by David Dill.
;;; Hacked up for speed and currently maintained by Scott Fahlman.

;;; This file assumes the use of ASCII codes and the specific character
;;; formats used in Spice Lisp and Vax Common Lisp.  It is optimized for
;;; performance rather than for portability and elegance.  It will have
;;; to be rewritten for any implementation that uses a significantly
;;; different layout for characters.  In particular, several thing here
;;; assume that the code field is the rightmost after the char is converted
;;; to a fixnum.

;;; *******************************************************************

(in-package 'lisp)

(export '(char-code-limit char-font-limit char-bits-limit standard-char-p
	  graphic-char-p string-char-p alpha-char-p upper-case-p lower-case-p
	  both-case-p digit-char-p alphanumericp char= char/= char< char>
	  char<= char>= char-equal char-not-equal char-lessp char-greaterp
	  char-not-greaterp char-not-lessp character char-code char-bits
	  char-font code-char make-char char-upcase char-downcase digit-char
	  char-int int-char char-name name-char char-control-bit char-meta-bit
	  char-hyper-bit char-super-bit char-bit set-char-bit))

;;; The following are visible to the user.

(defconstant char-code-limit 128
  "The upper exclusive bound on values produced by CHAR-CODE.")
(defconstant char-font-limit 256
  "The upper exclusive bound on values produced by CHAR-FONT.")
(defconstant char-bits-limit 256
  "The upper exclusive bound on values produced by CHAR-BITS.")

(defconstant char-control-bit 1
  "This bit indicates a control character.")
(defconstant char-meta-bit 2
  "This bit indicates a meta character.")
(defconstant char-super-bit 4
  "This bit indicates a super character.")
(defconstant char-hyper-bit 8
  "This bit indicates a hyper character.")

;20; moved init to end, so CODE-CHAR exists

;;; The following macros are for internal use in this file.

;20; get-and-check-int is in kernel

;;; Turn a fixnum into a character.

;20; int-to-char is in kernel
;;; Here begin the actual user-level functions.

(defun standard-char-p (char)
  "The argument must be a character object.  Standard-char-p returns T if the
   argument is a standard character -- one of the 95 ASCII printing characters
   or <return>."
  (let ((n (logand (get-and-check-int char) %character-int-mask)))
    (or (< 31 n 127)
	(= n 13))))


(defun graphic-char-p (char)
  "The argument must be a character object.  Graphic-char-p returns T if the
  argument is a printing character (space through ~ in ASCII), otherwise
  returns ()."
  (< 31 (logand (get-and-check-int char) %character-code-control-mask) 127))


;20; was 256 in SLISP
(defun string-char-p (char)
  "The argument must be a character object.  String-char-p returns T if the
   argument can be stored in a string."
  (< (char-int char) 128))


(defun alpha-char-p (char)
  "The argument must be a character object.  Alpha-char-p returns T if the
   argument is an alphabetic character, A-Z or a-z; otherwise ()."
  (let ((m (logand (get-and-check-int char) %character-code-control-mask)))
    (or (< 64 m 91) (< 96 m 123))))


(defun upper-case-p (char)
  "The argument must be a character object; upper-case-p returns T if the
   argument is an upper-case character, () otherwise."
  (< 64 (logand (get-and-check-int char) %character-code-control-mask) 91))


(defun lower-case-p (char)
  "The argument must be a character object; lower-case-p returns T if the 
   argument is a lower-case character, () otherwise."
  (< 96 (logand (get-and-check-int char) %character-code-control-mask) 123))


(defun both-case-p (char)
  "The argument must be a character object.  Both-case-p returns T if the
  argument is an alphabetic character and if the character exists in
  both upper and lower case.  For ASCII, this is the same as Alpha-char-p."
  (let ((m (logand (get-and-check-int char)
		   %character-code-control-mask)))
    (or (< 64 m 91) (< 96 m 123))))


(defun digit-char-p (char &optional (radix 10.))
  "If char is a digit in the specified radix, returns the fixnum for
  which that digit stands, else returns NIL.  Radix defaults to 10
  (decimal)."
  (let ((m (- (logand (get-and-check-int char)
		      %character-code-control-mask)
	      48)))
    (cond ((<= radix 10.)
	   ;; Special-case decimal and smaller radices.
	   (if (and (>= m 0) (< m radix))  m  nil))
	  ;; Cannot handle radix past Z.
	  ((> radix 36)
	   (error "~S too large to be an input radix."  radix))
	  ;; Digits 0 - 9 are used as is, since radix is larger.
	  ((and (>= m 0) (< m 10)) m)
	  ;; Check for upper case A - Z.
	  ((and (>= (setq m (- m 7)) 10) (< m radix)) m)
	  ;; Also check lower case a - z.
	  ((and (>= (setq m (- m 32)) 10) (< m radix)) m)
	  ;; Else, fail.
	  (t nil))))

(defun digit-char (weight &optional (radix 10.) (font 0))
  "Converts a number into a character object, e.g. 1 into #\1.
   You can specify the radix (default decimal) and font bits (default
   0) as options.  Returns NIL if request is impossible."
    (cond ;; validate args
	  ((or (< weight 0) (>= weight radix) (> radix 36)) nil)
	  ;; Digits 0 - 9 are used as is
	  ((< weight 10) (code-char (+ weight 48) 0 font))
	  ;; Else the usual character rep
	  (t (code-char (+ weight 55) 0 font))))

(defun alphanumericp (char)
  "Given a character-object argument, alphanumericp returns T if the
   argument is either numeric or alphabetic."
  (let ((m (logand (get-and-check-int char)
		   %character-code-control-mask)))
    (or (< 47 m 58) (< 64 m 91) (< 96 m 123))))


(defun char= (character &rest more-characters)
  "Returns T if all of its arguments are the same character."
  (do ((clist more-characters (cdr clist)))
      ((atom clist) T)
    (unless (eq (car clist) character) (return nil))))


(defun char/= (character &rest more-characters)
  "Returns T if no two of its arguments are the same character."
  (do* ((head character (car list))
	(list more-characters (cdr list)))
       ((atom list) T)
    (unless (do* ((l list (cdr l)))                  ;inner loop returns T 
		 ((atom l) T)			     ; iff head /= rest.
	      (if (eq head (car l)) (return nil)))
      (return nil))))


(defun char< (character &rest more-characters)
  "Returns T if its arguments are in strictly increasing alphabetic order."
  (do* ((c character (car list))
	(list more-characters (cdr list)))
       ((atom list) T)
    (unless (< (char-int c) (char-int (car list))) (return nil))))


(defun char> (character &rest more-characters)
  "Returns T if its arguments are in strictly decreasing alphabetic order."
  (do* ((c character (car list))
	(list more-characters (cdr list)))
       ((atom list) T)
    (unless (> (char-int c) (char-int (car list))) (return nil))))


(defun char<= (character &rest more-characters)
  "Returns T if its arguments are in strictly non-decreasing alphabetic order."
  (do* ((c character (car list))
	(list more-characters (cdr list)))
       ((atom list) T)
    (unless (<= (char-int c) (char-int (car list))) (return nil))))


(defun char>= (character &rest more-characters)
  "Returns T if its arguments are in strictly non-increasing alphabetic order."
  (do* ((c character (car list))
	(list more-characters (cdr list)))
       ((atom list) T)
    (unless (>= (char-int c) (char-int (car list))) (return nil))))



;;; Equal-char-int is used by the following functions as a version of char-int
;;;  which loses font, bits, and case info.

;20; equal-char-int is in kernel

(defun char-equal (character &rest more-characters)
  "Returns T if all of its arguments are the same character.
  Font, bits, and case are ignored."
  (do ((clist more-characters (cdr clist)))
      ((atom clist) T)
    (unless (= (equal-char-int (car clist))
	       (equal-char-int character))
      (return nil))))


(defun char-not-equal (character &rest more-characters)
  "Returns T if no two of its arguments are the same character.
   Font, bits, and case are ignored."
  (do* ((head character (car list))
	(list more-characters (cdr list)))
       ((atom list) T)
    (unless (do* ((l list (cdr l)))
		 ((atom l) T)
	      (if (= (equal-char-int head) (equal-char-int (car l)))
		  (return nil)))
      (return nil))))


(defun char-lessp (character &rest more-characters)
  "Returns T if its arguments are in strictly increasing alphabetic order.
   Font, bits, and case are ignored."
  (do* ((c character (car list))
	(list more-characters (cdr list)))
       ((atom list) T)
    (unless (< (equal-char-int c) (equal-char-int (car list))) (return nil))))


(defun char-greaterp (character &rest more-characters)
  "Returns T if its arguments are in strictly decreasing alphabetic order.
   Font, bits, and case are ignored."
  (do* ((c character (car list))
	(list more-characters (cdr list)))
       ((atom list) T)
    (unless (> (equal-char-int c) (equal-char-int (car list))) (return nil))))


(defun char-not-greaterp (character &rest more-characters)
  "Returns T if its arguments are in strictly non-decreasing alphabetic order.
   Font, bits, and case are ignored."
  (do* ((c character (car list))
	(list more-characters (cdr list)))
       ((atom list) T)
    (unless (<= (equal-char-int c) (equal-char-int (car list))) (return nil))))


(defun char-not-lessp (character &rest more-characters)
  "Returns T if its arguments are in strictly non-increasing alphabetic order.
   Font, bits, and case are ignored."
  (do* ((c character (car list))
	(list more-characters (cdr list)))
       ((atom list) T)
    (unless (>= (equal-char-int c) (equal-char-int (car list))) (return nil))))


(defun character (object)
  "Coerces its argument into a character object if possible.  Accepts
  characters, strings and symbols of length 1, and integers."
  (typecase object
    (character object)
    (integer (int-char object))
    (string (if (= 1 (length object))
		(elt object 0)
		(error "~S is of length >1, can't coerce into a character."
		       object)))
    (symbol (if (= 1 (length (symbol-name object)))
		(elt (symbol-name object) 0) 
		(error "~S is of length >1, can't coerce into a character."
		       object)))
    (otherwise (error "~S can't be coerced into a character." object))))

(defun char-code (char)
  "Given a character object argument, char-code returns the code attribute
   of that object as a non-negative integer."
  (logand (get-and-check-int char) %character-code-mask))


(defun char-bits (char)
  "Given a character object argument, char-code returns the bits attribute
   of that object as a non-negative integer."
  (ldb %character-control-byte (get-and-check-int char)))


(defun char-font (char)
  "Given a character object argument, char-code returns the font attribute
   of that object as a non-negative integer."
  (ldb %character-font-byte (get-and-check-int char)))


(defun code-char (code &optional (bits 0) (font 0))
  "All three arguments, must be non-negative integers; the last two are 
   optional with default values of 0 (for the bits and font attributes).
   Returns a character object with the specified code, bits, and font,
   or returns NIL if this is not possible."
  (cond ((not (< -1 code char-code-limit)) nil)
	((and (zerop bits) (zerop font) (int-char code)))
	(t (and (< -1 bits char-bits-limit)
		(< -1 font char-font-limit)
		(int-char (dpb font %character-font-byte
			       (dpb bits %character-control-byte
				    code)))))))


(defun make-char (char &optional (bits 0) (font 0))
  "Replaces the bits and font attributes of the specified character with
  those supplied by the user as fixnums.  Bits and font both default to 0."
  (and (< -1 bits char-bits-limit)
       (< -1 font char-font-limit)
       (int-to-char (dpb font %character-font-byte
			 (dpb bits %character-control-byte
			      (logand (get-and-check-int char)
				      %character-code-mask))))))

(defun char-upcase (char)
  "Returns a character with the same bits and font as the input character,
  converted to upper-case if that is possible."
  (let ((n (get-and-check-int char)))
    (cond ((< 96 (logand n %character-code-control-mask) 123)
	   (int-to-char (- n 32)))
	  (t char))))


(defun char-downcase (char)
  "Returns a character with the same bits and font as the input character,
  converted to lower-case if that is possible."
  (let ((n (get-and-check-int char)))
    (cond ((< 64 (logand n %character-code-control-mask) 91)
	   (int-to-char (+ n 32)))
	  (t char))))


(defun digit-weight (weight &optional (radix 10) (bits 0) (font 0))
  "All arguments must be integers.  Returns a character object that
  represents a digit of the given weight in the specified radix.  The
  character will have the specified bits and font attributes."
  (and (< weight radix) (< weight 36)
       (code-char (if (< weight 10) (+ 48 weight) (+ 55 weight))
		  bits font)))


(defun char-int (ch)
  "The argument must be a character-object.  Returns the font, bits, and
  code fields as a single non-negative integer.  Implementation dependent.
  Used mostly for hashing."
  (logand (get-and-check-int ch) %character-int-mask))


(defun int-char (n)
  "Performs the inverse of char-int.  The argument must be a non-negative
  integer of the appropriate size.  It is turned into a character object."
  (cond ((and (fixnump n) (<= 0 n %character-int-mask))
	 (int-to-char n))
	(t nil)))


(defun char-name (char)
  "Given a character object, char-name returns the name for that
  object (a symbol)."
  (car (rassoc char char-name-alist :test #'char=)))

(defun name-char (sym)
  "Given a symbol as argument, name-char returns a character object whose
   name is that symbol, if one exists.  Otherwise, () is returned."
  (let ((name (string sym))) 
    (cdr (assoc name char-name-alist :test #'string-equal))))

(defun char-bit (char name)
  "Returns T if the named bit is set in character object CHAR.  Else,
  returns NIL.  Legal names are :CONTROL, :META, :HYPER, and :SUPER."
  (logtest (case name
	     (:control char-control-bit)
	     (:meta char-meta-bit)
	     (:hyper char-hyper-bit)
	     (:super char-super-bit))
	   (char-bits char)))


(defun set-char-bit (char name newvalue)
  "Returns a character just like CHAR except that the named bit is
  set or cleared, according to whether NEWVALUE is non-null or NIL.
  Legal bit names are :CONTROL, :META, :HYPER, and :SUPER."
  (let ((bit (case name
	      (:control char-control-bit)
	      (:meta char-meta-bit)
	      (:hyper char-hyper-bit)
	      (:super char-super-bit)
	      (t 0))))
    (code-char (char-code char)
	       (if newvalue
		   (logior bit (char-bits char))
		   (logand (lognot bit) (char-bits char)))
	       (char-font char))))

;20; I have commented out the ASCII names for the moment, since we
;20;  are currently searching this.  When we make it a hash table
;20;  they should be put back in.

(defvar char-name-alist
  `(("NULL" . ,(code-char 0)) ("^@" . ,(code-char 0))
    ("^A" . ,(code-char 1)) ; ("SOH" . ,(code-char 1))
    ("^B" . ,(code-char 2)) ; ("STX" . ,(code-char 2))
    ("^C" . ,(code-char 3)) ; ("ETX" . ,(code-char 3))
    ("^D" . ,(code-char 4)) ; ("EOT" . ,(code-char 4))
    ("^E" . ,(code-char 5)) ; ("ENQ" . ,(code-char 5))
    ("^F" . ,(code-char 6)) ; ("ACK" . ,(code-char 6))
    ("BELL" . ,(code-char 7)) ("^G" . ,(code-char 7))
    ("BACKSPACE" . ,(code-char 8)) ("BS" . ,(code-char 8))
	("^H" . ,(code-char 8))
    ("TAB" . ,(code-char 9)) ("^I" . ,(code-char 9))
    ("NEWLINE" . ,(code-char 10)) ("NL" . ,(code-char 10))
	("LINEFEED" . ,(code-char 10))  ("^J" . ,(code-char 10))
    ("^K" . ,(code-char 11)) ; ("VT" . ,(code-char 11))
    ("PAGE" . ,(code-char 12)) ("FF" . ,(code-char 12))
        ("FORMFEED" . ,(code-char 12)) ("FORM" . ,(code-char 12))
	("^L" . ,(code-char 12))
    ("RETURN" . ,(code-char 13)) ("^M" . ,(code-char 13))
	("CR" . ,(code-char 13))
    ("^N" . ,(code-char 14)) ; ("SO" . ,(code-char 14))
    ("^O" . ,(code-char 15)) ; ("SI" . ,(code-char 15))
    ("^P" . ,(code-char 16)) ; ("DLE" . ,(code-char 16))
    ("^Q" . ,(code-char 17)) ; ("DC1" . ,(code-char 17))
;	("XON" . ,(code-char 17))
    ("^R" . ,(code-char 18)) ; ("DC2" . ,(code-char 18))
    ("^S" . ,(code-char 19)) ; ("DC3" . ,(code-char 19))
;	("XOFF" . ,(code-char 19))
    ("^T" . ,(code-char 20)) ; ("DC4" . ,(code-char 20))
    ("^U" . ,(code-char 21)) ; ("NAK" . ,(code-char 21))
    ("^V" . ,(code-char 22)) ; ("SYN" . ,(code-char 22))
    ("^W" . ,(code-char 23)) ; ("ETB" . ,(code-char 23))
    ("^X" . ,(code-char 24)) ; ("CAN" . ,(code-char 24))
    ("^Y" . ,(code-char 25)) ; ("EM" . ,(code-char 25))
    ("^Z" . ,(code-char 26)) ; ("SUB" . ,(code-char 26))
    ("ESCAPE" . ,(code-char 27)) ("ESC" . ,(code-char 27))
    	("ALTMODE" . ,(code-char 27)) ("ALT" . ,(code-char 27))
	("^[" . ,(code-char 27))
    ("^\\" . ,(code-char 28)) ; ("FS" . ,(code-char 28))
    ("^]" . ,(code-char 29)) ; ("GS" . ,(code-char 29))
    ("^^" . ,(code-char 30)) ; ("RS" . ,(code-char 30))
    ("^_" . ,(code-char 31)) ; ("US" . ,(code-char 31))
    ("SPACE" . ,(code-char 32)) ("SP" . ,(code-char 32))
    ("RUBOUT" . ,(code-char 127)) ("DELETE" . ,(code-char 127))
	("^?" . ,(code-char 127)))
  "This is the alist of (character-name . character) for characters
  with long names.  The first name in this list for a given character
  is used on typeout and is the preferred form for input.")