(defpackage :charcode
(:use :cl #+sbcl :sb-gray #-sbcl :gray)
- (:export "MAKE-ENCODER" "MAKE-DECODER" "ENCODE-STRING" "DECODE-STRING"
+ (:export "MAKE-ENCODER" "MAKE-DECODER" "ENCODE-STRING" "DECODE-STRING" "SYSTEM-CHARSET"
"CODING-ERROR"
"MAKE-CODEC-CHARACTER-STREAM"
- "LATIN-1" "LATIN1" "UTF-8" "UTF8"))
+ "ASCII" "LATIN-1" "LATIN1" "UTF-8" "UTF8"))
(in-package :charcode)
;;; General stuff
(defun make-decoder (name)
(the decoder-fun (values (funcall (get name 'make-decoder)))))
-(defun encode-string (string coding)
+(defun system-charset ()
+ ;; XXX: Replace me with something perhaps more sensible.
+ 'utf-8)
+
+(defun encode-string (string &optional (coding (system-charset)))
(declare (type string string))
(let ((encoder (make-encoder coding))
(buf (make-array (list (length string)) :element-type '(unsigned-byte 8) :adjustable t :fill-pointer 0)))
(coding-error string (length string) buf "Encoding of string in ~A ended prematurely." coding))
buf))
-(defun decode-string (buffer coding)
+(defun decode-string (buffer &optional (coding (system-charset)))
(declare (type (array (unsigned-byte 8)) buffer))
(let ((decoder (make-decoder coding))
(buf (make-array (list (length buffer)) :element-type 'character :adjustable t :fill-pointer 0)))
(read-pos :initform 0)
(buffer :initform (make-array '(64) :element-type 'character :adjustable t :fill-pointer 0))))
-(defun make-codec-character-stream (real-stream charset)
+(defun make-codec-character-stream (real-stream &optional (charset (system-charset)))
(declare (type stream real-stream))
(make-instance 'codec-character-stream :decoder (make-decoder charset) :encoder (make-encoder charset) :back real-stream))
(with-slots (decoder back buffer read-pos) stream
(let ((readbuf (make-array (list len) :element-type '(unsigned-byte 8))))
(loop (unless (< (- (length buffer) read-pos) len) (return t))
- (let ((readlen (read-sequence readbuf back)))
+ (let ((readlen (read-sequence readbuf back :end (- len (- (length buffer) read-pos)))))
(when (= readlen 0)
(return-from ccs-ensure-buffer nil))
(funcall decoder readbuf buffer :end readlen))))))
(declare (type codec-character-stream stream))
(with-slots (read-pos buffer) stream
(replace buffer buffer :start2 read-pos)
- (setf (fill-pointer buffer) (- (fill-pointer buffer) read-pos)
- read-pos 0)))
+ (setf (fill-pointer buffer) (- (fill-pointer buffer) read-pos)
+ read-pos 0)))
(defmethod stream-read-char ((stream codec-character-stream))
(unless (ccs-ensure-buffer stream 1)
(adjust-array buffer (list (setf (fill-pointer buffer)
(+ len 16)))))
(replace buffer buffer :start1 16 :end2 len)))
- (setf (aref buffer read-pos) char)
- (decf read-pos)
+ (setf (aref buffer (decf read-pos)) char)
nil))
(defun ccs-wont-hang-p (stream)
(declare (type character char))
(char-code char))
+;;; ASCII
+
+(defun decode-ascii (byteseq charseq &key (start 0) (end (length byteseq)))
+ (declare (type (array (unsigned-byte 8)) byteseq)
+ (type (array character) charseq)
+ (type fixnum start end))
+ (loop
+ (restart-case
+ (loop
+ (unless (< start end) (return-from decode-ascii t))
+ (let ((byte (aref byteseq (prog1 start (incf start)))))
+ (unless (< byte 128)
+ (coding-error byteseq start charseq "Invalid byte ~D in ASCII stream." byte))
+ (vector-push-extend (unicode->char byte) charseq)))
+ (:replace-char (&optional (replacement (unicode->char #xfffd)))
+ :report "Replace the invalid byte with a character."
+ (vector-push-extend replacement charseq))
+ (:skip-char ()
+ :report "Ignore the invalid byte."
+ nil))))
+
+(defun encode-ascii (charseq byteseq &key (start 0) (end (length charseq)))
+ (declare (type (array (unsigned-byte 8)) byteseq)
+ (type (array character) charseq)
+ (type fixnum start end))
+ (loop
+ (restart-case
+ (loop
+ (unless (< start end) (return-from encode-ascii t))
+ (vector-push-extend (let ((cp (char->unicode (aref charseq (prog1 start (incf start))))))
+ (unless (< cp 128)
+ (coding-error charseq start byteseq "ASCII cannot encode code-points higher than 128."))
+ cp)
+ byteseq))
+ (:replace-char (&optional (replacement #\?))
+ :report "Replace this character with another."
+ (vector-push-extend (char->unicode replacement) byteseq))
+ (:skip-char ()
+ :report "Ignore this character."
+ nil))))
+
+(define-decoder (ascii)
+ #'decode-ascii)
+
+(define-encoder (ascii)
+ #'encode-ascii)
+
+(define-codec-synonyms ascii :ascii)
+
;;; Latin-1
(defun decode-latin-1 (byteseq charseq &key (start 0) (end (length byteseq)))
(define-encoder (latin-1)
#'encode-latin-1)
-(define-codec-synonyms latin-1 latin1 iso-8859-1)
+(define-codec-synonyms latin-1 latin1 iso-8859-1 :latin-1 :latin1 :iso-8859-1)
;;; UTF-8
(setf mlen 0))))))))
#'decode)))
-(define-codec-synonyms utf-8 utf8)
+(define-codec-synonyms utf-8 utf8 :utf-8 :utf8)