Google
 

Trailing-Edge - PDP-10 Archives - SRI_NIC_PERM_SRC_3_19910112 - utilities/pack.sai
There are no other files named pack.sai in the archive.
begin "PACK"

comment This program was developed on WAITS (by DON) and adapted for use on T(W)ENEX (by
	JLS and DON).  Where differences between the two versions are required, the
	following macros are used.  Note that JFNS is pre-declared only in T(W)ENEX SAIL.;

require "{~<>" delimiters;

define	TENEX = declaration(jfns),
	!!TENEXONLY = {ifc tenex thenc~,
	!!WAITSONLY = {ifc not tenex thenc~,
	!!ENDTENEX = {endc~,
	!!ENDWAITS = {endc~,
	!! = {~;

!!WAITSONLY
!!	require "  (Compiling WAITS version)  " message;
!!ENDWAITS;

!!TENEXONLY
!!	require "  (Compiling TENEX version)  " message;
!!	require "G15000S" compilerswitches;
!!ENDTENEX;

define	 = {begin~,
	 = {end~,
	thru = {step 1 until~,
	repeat = {do~,
	 = {'15&'12~,
	! = {comment~;

! This program compresses text & binary files.  Unlike "CRUNCH" (CRU2 & SPINDL), the
  compression is on a character-by-character basis.  The file is scanned twice: once
  to determine the character frequencies so that an optimum encoding may be
  computed, then again to perform the encoding.  About 40 to 50% of the file size
  may be eliminated by judicious choice as to which characters should be represented
  by the fewest bits.  The program can also be used to perform the reverse
  operation, which takes a single pass.;

define	MX = {12~;

! MX is the maximum number of bits which will be used to represent any char, < 18.
  Some related pieces of info follow.;

define	MXBIT = {(1 lsh (MX-1))~,
	ONES = {((MXBIT lsh 1) - 1)~;

integer array TALLY, ORDER, NBITS, PATTERN[0:'777], INFO[1:6], FDB[0:30],
	FIRST, LAST[1:MX], BUF[0:'377], TABLE, SKIP[0:ONES];
integer BRK, EOF, FULLWORD, MODE, X, Y, WORDPTR, BITPTR, CHAR, TOTBITS, THISWORD, NEXTWORD,
	BYTESIZE, MAXCHAR, BITSWRITTEN, PACKCHAN, UNPKCHAN;
boolean VERBOSE, CHECKSUM, NONTEXT;
string S;
comment BLATHER;

procedure BLATHER (integer WHAT; string MSG (null));

! This routine produces some of the verbose-mode output.;

if VERBOSE then case WHAT of 

	print (, "Character tally:", );
	for X _ 0 thru MAXCHAR do
		print (TALLY[X], (if X land 7 = 7 then  else '11))
;

	print (, MSG, " bit assignments:", );
	for X _ 1 thru MX do
		if LAST[X]  FIRST[X] then
			print (X, ": ");
			for Y _ FIRST[X] thru LAST[X] do
				if NONTEXT then
					print ("'",cvos(ORDER[Y])," ")
				else
					case ORDER[Y] of 
					[0]	print ("<CRLF>");
					['11]	print ("<TAB>");
					['12]	print ("<LF>");
					['13]	print ("<VT>");
					['14]	print ("<FF>");
					['15]	print ("<CR>");
					['40]	print ("<SP>");
					['175]	print ("<ALT>");
					['177]	print ("<BS>");
					else	print (ORDER[Y]&null)
					;
			print ()
		;
	print ()
;	

;
comment OUTBITS, INBITS;

procedure OUTBITS (integer VAL, SIZE);

! Pack SIZE bits from left of VAL into BUF.  WORDPTR is number of words of
  BUF filled so far, and BITPTR is number of bits used in current word.
  BITSWRITTEN keeps track of actual amount of output--if it differs from
  TOTBITS something's dreadfully wrong (probably missing-buffer-bug).;

	BUF[WORDPTR] _ BUF[WORDPTR] lor (VAL lsh -BITPTR);
	BITPTR _ BITPTR + SIZE;
	if BITPTR  36 then
		CHECKSUM _ CHECKSUM xor BUF[WORDPTR];
		WORDPTR _ WORDPTR + 1;
		if WORDPTR = '400 then	! Flush full buffer;
			arryout (PACKCHAN, BUF[0], '400);
			arrclr (BUF);
			WORDPTR _ 0
		;
		BITPTR _ BITPTR - 36;
		BUF[WORDPTR] _ VAL lsh (SIZE-BITPTR)
	;
	BITSWRITTEN _ BITSWRITTEN + SIZE
;

integer procedure INBITS (integer SIZE; boolean SKIP (true));

! Unpack SIZE bits from THISWORD and NEXTWORD, returning them right-justified.
  Assume BITPTR bits have been used out of THISWORD.  If SKIP, then advance the
  BITPTR over the bits used and decrement TOTBITS.;

	integer VAL;

	if BITPTR  36 then
		BITPTR _ BITPTR - 36;
		THISWORD _ NEXTWORD;
		CHECKSUM _ CHECKSUM xor (NEXTWORD _ wordin (PACKCHAN))
	;
	VAL _ (THISWORD lsh BITPTR) lor (NEXTWORD lsh (BITPTR-36));
	if SKIP then
		BITPTR _ BITPTR + SIZE;
		TOTBITS _ TOTBITS - SIZE
	;
	return (VAL lsh (SIZE-36))
;
comment READSOME, READCHAR, READFRONT, READNAME;

string procedure READSOME;

! Read in a hunk of file UNPKCHAN, up to 1000 chars.  If line number encountered, it's
  not a text file unless MODE0 and char after number is a tab.  If MODE still zero,
  set it based on whether file starts with line number.;

	string INP, LNO;

	INP _ input (UNPKCHAN, 1);
	if BRK < 0 then
		if MODE > 0 or length (LNO _ input (UNPKCHAN, 2))  5 then
			if equ (LNO, "     "&'15&'15) and BRK = '14 then
				INP _ INP & '14
			else
				NONTEXT _ EOF _ true;
				INP _ S _ null
			
		else if MODE = 0 then
			MODE _ -1;
			print ("(Ignoring SOS line numbers)", )
		
	
	else if MODE = 0 then
		MODE _ 1;
	return (INP)
;

integer procedure READCHAR;

! Lop char from front of S and return it, being sure to leave S non-null unless at
  EOF.  Detect crlf as special case, returning 0.  (Real nulls are discarded.)  If
  not text, return next 9-bit byte out of FULLWORD.;

	integer CH;

	if NONTEXT then
		CH _ (FULLWORD _ FULLWORD rot 9) land '777;
		if (BRK _ BRK + 1) = 4 then
			BRK _ 0;
			FULLWORD _ wordin (UNPKCHAN)
		;
		return (CH)
	;
	while length(S) < 3 and not EOF do
		S _ S & readsome;	! Make sure at least 3 chars, if possible;
	if (CH _ lop(S))  '15 or S[1 for 1]  '12 then
		return (CH);
	return (0*lop(S))
;	

procedure READFRONT (boolean TELL);

! Set up to read text file on channel UNPKCHAN.  Skip ETV directory if present,
  reporting the fact to varying degrees depending on TELL and VERBOSE.  Leave at
  least one char in S unless file is empty, since S=null is EOF indicator.  If we
  already know it's not text, read first word into FULLWORD and set byte count.;

	if NONTEXT then
		BRK _ 0;
		FULLWORD _ wordin (UNPKCHAN)
	
	else
		S _ readsome;
		while length (S) < 24 and not EOF do
			S _ S & readsome;
		if equ (S[1 for 10] & S[13 for 6] & S[24 for 6], "COMMENT  VALID  PAGES") then
			if TELL then
				print ("(Discarding ETV directory)", )
			else if VERBOSE then
				print ("(Ignoring ETV directory)", );
			while length(S) do
				if readchar = '14 and length(S) then
					return;
			print ("File contains nothing but ETV directory (which is ignored).", )
		
	;

boolean procedure READNAME (reference string NM);

! Reads a file name frm the terminal into NM, returns TRUE and types "aborted" if
  the input is null.;

	if length (NM _ inchwl) > 0 then return (false);
	print ("...aborted.", );
	return (true)
;
comment TWEAK;

procedure TWEAK;

! This routine is used by PACK to tweak the assignment for better packing.  There
  are two kinds of tweak.  [1] If, for some i, the least frequent (i-1)-bit char is
  less frequent than the 2^k most frequent (i+k)-bit chars, then it pays to use one
  less bit for each of those (i+k)-bit chars and one more bit for the (i-1)-bit
  char.  [2] If, for some i, the most frequent i-bit char is more frequent than the
  two least frequent ones, then it pays to use one less bit for the former and one
  more for the latter.  Tweaks of type 1 are done even if they only break even,
  since they may cut down on the number of different bit-lengths used (saving 7 bits
  at the front of the packed file) and because it keeps the process alive a bit
  longer, so we might find more tweaks.;

while true do
	integer I, K, N, BEST, HOW, SAVE;
	BEST _ HOW _ 0;
	for I _ 2 thru MX-1 do
		if LAST[I-1]  FIRST[I-1] then
			for K _ 1 thru MX-I do
				if LAST[I+K]  FIRST[I+K] + (ONES lsh (K-MX)) then
					SAVE _ TALLY[ORDER[LAST[I-1]]];
					for N _ 1 thru (1 lsh K) do
						SAVE _ SAVE - TALLY[ORDER[FIRST[I+K]+N-1]];
					if SAVE  BEST then
						BEST _ SAVE;
						HOW _ (K lsh 18) + I
					
				;
		if LAST[I]  FIRST[I]+2 then
			SAVE _ TALLY[ORDER[LAST[I]]] + TALLY[ORDER[LAST[I]-1]]
				- TALLY[ORDER[FIRST[I]]];
			if SAVE < BEST then
				BEST _ SAVE;
				HOW _ I
			
		
	;
	if HOW lsh -18 then
		I _ HOW land '777777;
		K _ HOW lsh -18;
		if VERBOSE then
			print ("Tweaking ", I-1, " and ", I+K, " inward saves ", -BEST, );
		NBITS[ORDER[LAST[I-1]]] _ NBITS[ORDER[LAST[I-1]]] + 1;
		LAST[I-1] _ LAST[I-1] - 1;
		FIRST[I] _ FIRST[I] - 1;
		LAST[I+K-1] _ LAST[I+K-1] + (1 lsh K);
		FIRST[I+K] _ FIRST[I+K] + (1 lsh K);
		for N _ 1 thru (1 lsh K) do
			NBITS[ORDER[FIRST[I+K]-N]] _ NBITS[ORDER[FIRST[I+K]-N]] - 1
	
	else if HOW then
		if VERBOSE then
			print ("Tweaking ", HOW, " outward saves ", -BEST, );
		LAST[HOW-1] _ LAST[HOW-1] + 1;
		NBITS[ORDER[FIRST[HOW]]] _ NBITS[ORDER[FIRST[HOW]]] - 1;
		NBITS[ORDER[LAST[HOW]]] _ NBITS[ORDER[LAST[HOW]]] + 1;
		NBITS[ORDER[LAST[HOW]-1]] _ NBITS[ORDER[LAST[HOW]-1]] + 1;
		FIRST[HOW] _ FIRST[HOW] + 1;
		LAST[HOW] _ LAST[HOW] - 2;
		FIRST[HOW+1] _ FIRST[HOW+1] - 2
	
	else
		done
;
comment PACK;

procedure PACK;
	integer TOTAL, CH, POSN, NUMDIFF, NEW, OLD, USED, FREQ, BIT;
	string UNPKNM, PACKNM;

	print ("ack...", , "Text file: ");
!!WAITSONLY
!!	open (UNPKCHAN_getchan, "dsk", 0, 19, 0, 1000, BRK, EOF_0);
!!	repeat
!!		if readname (UNPKNM) then
!!			release (UNPKCHAN);
!!			return
!!		;
!!		lookup (UNPKCHAN, UNPKNM, EOF);
!!		if EOF then
!!			print ("Can't find it, try again: ")
!!	 until not EOF;
!!	fileinfo (INFO);
!!	OLD _ - (INFO[4] rot 18);	! Original file size;
!!ENDWAITS
!!TENEXONLY
!!	UNPKNM _ jfns (UNPKCHAN _ openfile (null, "ROC"), 0);
!!	setinput (UNPKCHAN, 1000, BRK, EOF_0);
!!	swdptr (UNPKCHAN, -1);		! Point to end of file;
!!	OLD _ rwdptr (UNPKCHAN);	! Get value;
!!	swdptr (UNPKCHAN, 0);		! Back to beginning of file;
!!ENDTENEX
	if OLD = 0 then
		print ("Can't pack zero-word file!", );
		release (UNPKCHAN);
		return
	;

	print ("Packed output file: ");
!!WAITSONLY
!!	open (PACKCHAN_getchan, "dsk", 8, 0, 19, EOF, EOF, EOF);
!!	repeat
!!		if readname (PACKNM) then
!!			release (PACKCHAN);
!!			release (UNPKCHAN);
!!			return
!!		;
!!		enter (PACKCHAN, PACKNM, EOF);
!!		if EOF then
!!			print ("Can't enter file, try again: ")
!!	 until not EOF;
!!ENDWAITS
!!TENEXONLY
!!	PACKCHAN _ openfile (null, "WC");
!!ENDTENEX

	! First step: count number of occurrences of each character in input file;

	setbreak (1, null, null, "LI");
	setbreak (2, '11&'14, null, "I");
	MODE _ 0;		! -1 for line numbers, +1 for no line numbers;
	NONTEXT _ false;
	arrclr (TALLY);
	readfront (false);	! Set up to read file, maybe skipping directory;
	while length(S) do	! This is done here to make sure about NONTEXT;
		TALLY[CH _ readchar] _ TALLY[CH] + 1;
	if NONTEXT then
		print ("Invalid format for text file.", , "Type Y to pack using 9-bit bytes. ");
		if inchrw lor '40  "y" then
			release (PACKCHAN);
			release (UNPKCHAN);
			return
		;
		print ();
!!WAITSONLY
!!		release (UNPKCHAN);	! Need to reopen it in binary mode;
!!		open (UNPKCHAN_getchan, "dsk", 8, 19, 0, EOF, EOF, EOF_0);
!!		lookup (UNPKCHAN, UNPKNM, EOF);
!!ENDWAITS
!!TENEXONLY
!!		release (UNPKCHAN);	! Can't just swdptr to 0 since SAIL then loses first 1-bit in bit 35;
!!		UNPKCHAN _ openfile (UNPKNM, "RO");
!!		setinput (UNPKCHAN, 1000, BRK, EOF_0);
!!ENDTENEX
		arrclr (TALLY);
		readfront (false);
		while not EOF do
			TALLY[CH _ readchar] _ TALLY[CH] + 1;
		BYTESIZE _ 9;
		MAXCHAR _ '777
	
	else
		BYTESIZE _ 7;
		MAXCHAR _ '177
	;
	blather (0);

	! Next, sort the chars by frequency and accumulate total;

	TOTAL _ NUMDIFF _ 0;
	for CH _ 0 thru MAXCHAR do
		if TALLY[CH] then
			POSN _ NUMDIFF _ NUMDIFF + 1;
			while (POSN _ POSN-1) > 0 and TALLY[ORDER[POSN-1]]<TALLY[CH] do
				ORDER[POSN] _ ORDER[POSN-1];	! Simple bubble;
			TOTAL _ TOTAL + TALLY[ORDER[POSN] _ CH]
		;

	! Special case for fewer than 2 distinct chars.;

	if NUMDIFF < 2 then
		if VERBOSE then
			print (, "Special case for <2 different characters.", );
		print ("From ", OLD, " words to 1 (saving ",
			cvf((OLD-1)/OLD * 100), "%)", );
		wordout (PACKCHAN, ((if NONTEXT then -TOTAL else TOTAL) lsh 7) + ORDER[0]);
		! Note that, if 9-bit bytes, bottom two bits of TOTAL are zeros.;
		release (PACKCHAN);
		release (UNPKCHAN);
		return
	;

	! Compute initial allocation of bit combinations.  Number of bits is
	  proportional to negative lg of proportion of char in file, rounded down.
	  If this would not leave enough combinations for the remaining chars, then
	  more bits (hence fewer comb's) are allocated for this char.  Actually,
	  this might fail to allocate all comb's (e.g. if only 2 chars with freq =
	  1000 to 1).  So allocate k comb's where k = fc/t rounded up to power of 2,
	  f = freq of char, c = number of unused comb's, t = total freq of remaining
	  chars.  No chars are allocated MX bits at this stage, so total comb's =
	  2^(MX-1) = MXBIT.  Note that since chars are taken in decr order of freq,
	  fc/t  1.;

	USED _ 0;
	for POSN _ 0 thru NUMDIFF-1 do
		! Find k by shifting t (in CH) until  fc, subtracting one bit per shift;
		CH _ TOTAL;	BIT _ MX-1;
		while CH < TALLY[ORDER[POSN]] * (MXBIT-USED) do
			CH _ CH lsh 1;
			BIT _ BIT - 1
		;
		while USED + (MXBIT lsh -BIT) + (NUMDIFF-POSN) > (MXBIT+1) do
			BIT _ BIT + 1;
		NBITS[ORDER[POSN]] _ BIT;
		TOTAL _ TOTAL - TALLY[ORDER[POSN]];
		USED _ USED + (MXBIT lsh -BIT)
	;

	! Find first/last chars in ORDER which were assigned each number of bits;

	FIRST[1] _ POSN _ 0;
	for BIT _ 1 thru MX-1 do
		while POSN < NUMDIFF and NBITS[ORDER[POSN]]  BIT do
			POSN _ POSN + 1;
		LAST[BIT] _ POSN - 1;
		FIRST[BIT+1] _ POSN
	;
	LAST[MX] _ NUMDIFF - 1;
	blather (1, "Initial");

	! Tweak the assignment to optimise it, then compute compression factor.
	  It's probably high time to describe the format of the packed file.  The
	  first word contains (TOTBITS lsh 18)+USED.  TOTBITS is the total number of
	  bits of packed data, and is used to determine when the end of the packed
	  file has been reached when unpacking.  USED has bit (1 lsh (k-1)) set iff
	  there are k-bit chars.  Bit (1 lsh 17) is set if 9-bit bytes are being
	  used.  Next, for each k which does have k-bit chars, there is a 7-bit (or
	  9-bit) count saying how many such chars, followed by that many 7- or 9-bit
	  chars.  Then comes the packed data, and finally a checksum at the end so
	  the unpacker can (a) always snarf as many bits as it wants without
	  worrying about overshooting eof, and (b) detect whether there was a bit
	  wrong in the packed file.;

	tweak;
	blather (1, "Final");
	TOTBITS _ USED _ 0;
	for POSN _ 0 thru NUMDIFF-1 do
		TOTBITS _ TOTBITS + TALLY[ORDER[POSN]] * NBITS[ORDER[POSN]];
	for BIT _ 1 thru MX do
		if LAST[BIT]  FIRST[BIT] then
			USED _ USED lor (1 lsh (BIT-1));
			TOTBITS _ TOTBITS + BYTESIZE
		;
	if NONTEXT then
		USED _ USED lor (1 lsh 17);
	TOTBITS _ TOTBITS + NUMDIFF*BYTESIZE + 72;
	NEW _ (TOTBITS + 35) % 36;
	print ("From ", OLD, " words to ", NEW, " (saving ",
		cvf((OLD-NEW)/OLD * 100), "%)", );

	! Put out header stuff into packed file.  Simultaneously compute the bit
	  pattern which will be used for each char.;

	arrclr (BUF);
	BITSWRITTEN _ CH _ CHECKSUM _ WORDPTR _ BITPTR _ 0;
	outbits ((TOTBITS lsh 18) lor USED, 36);
	for BIT _ 1 thru MX do
		if LAST[BIT]  FIRST[BIT] then
			outbits ((LAST[BIT]-FIRST[BIT]+1) lsh (36-BYTESIZE), BYTESIZE);
			for POSN _ FIRST[BIT] thru LAST[BIT] do
				outbits (ORDER[POSN] lsh (36-BYTESIZE), BYTESIZE);
				PATTERN[ORDER[POSN]] _ (CH lsh (36-MX));
				CH _ CH + (MXBIT lsh (1-BIT))
			
		;

	! And here we go!  Rewind the file and start packing.;

!!WAITSONLY
!!	useti (UNPKCHAN, 1);
!!ENDWAITS
!!TENEXONLY
!!	swdptr (UNPKCHAN, 0);	! For some obscure reason useti loses;
!!ENDTENEX
	readfront (true);
	while (if NONTEXT then (not EOF) else length(S)) do
		CH _ readchar;
		outbits (PATTERN[CH], NBITS[CH])
	;

	! Close it up;

	if BITSWRITTEN  TOTBITS-36 then
		print ("File different on second pass.  Suggest you try again.", );
	if BITPTR  0 then
		outbits (0, 36-BITPTR);
	arryout (PACKCHAN, BUF[0], WORDPTR);
	wordout (PACKCHAN, CHECKSUM);
	release (PACKCHAN);	! Have to release (not just close) so that;
	release (UNPKCHAN)	! subsequent operations can getchan same chans;
;
comment UNPACK;

procedure UNPACK;
	integer BIT, USED, CODE, I, J, CH, HOWMANY, SIZE, OSIZE;
	string NM;

	boolean procedure ASKOUT;
		if NONTEXT then
			print ("Binary output file: ")
		else
			print ("Text output file: ");
!!WAITSONLY
!!		open (UNPKCHAN_getchan, "dsk", (if NONTEXT then 8 else 0), 0, 19, EOF, EOF, EOF_0);
!!		repeat
!!			if readname (NM) then
!!				release (UNPKCHAN);
!!				return (true)
!!			;
!!			enter (UNPKCHAN, NM, EOF);
!!			if EOF then
!!				print ("Can't enter file, try again: ")
!!		 until not EOF;
!!		return (false)
!!ENDWAITS
!!TENEXONLY
!!		UNPKCHAN _ openfile (null, "WC");
!!		if EOF _ (UNPKCHAN = -1) then
!!			print ("Can't enter file.", );
!!		return (EOF)
!!ENDTENEX
	;

	print ("npack...", , "Packed file: ");
!!WAITSONLY
!!	open (PACKCHAN_getchan, "dsk", 8, 19, 0, EOF, EOF, EOF);
!!	repeat
!!		if readname (NM) then
!!			release (PACKCHAN);
!!			return
!!		;
!!		lookup (PACKCHAN, NM, EOF);
!!		if EOF then
!!			print ("Can't find it, try again: ")
!!	 until not EOF;
!!	fileinfo (INFO);
!!	OSIZE _ - (INFO[4] rot 18);	! Original file size;
!!ENDWAITS
!!TENEXONLY
!!	PACKCHAN _ openfile (null, "ROC");
!!	setinput (PACKCHAN, 0, EOF, EOF);
!!	if rfbsz (PACKCHAN)  36 then
!!	       print ("Input not packed.", );
!!		release (PACKCHAN);
!!		return
!!	;
!!	swdptr (PACKCHAN, -1);		! Point to end of file;
!!	OSIZE _ rwdptr (PACKCHAN);	! Get value;
!!	swdptr (PACKCHAN, 0);		! Back to beginning of file;
!!ENDTENEX

	if OSIZE = 1 then	! One-word file, special case;
		if (NONTEXT _ (SIZE _ (CODE _ wordin (PACKCHAN)) ash -7) < 0) and CODE land 1 = 0 then
		! Can't be all-identical 9-bit bytes with bottom bit zero;
			print ("Input not packed.", );
			release (PACKCHAN);
			return
		;
		if NONTEXT then
			SIZE _ -(SIZE land -4);
		print ("Special file (", SIZE, " identical", (if NONTEXT then
			" 9-bit" else null), " characters).", );
		if ASKOUT then
			release (PACKCHAN);
			return
		;
		if NONTEXT then
			FULLWORD _ (CODE land '777) * '1001;
			FULLWORD _ FULLWORD + (FULLWORD lsh 18);
			for I _ 1 thru SIZE div 4 do
				wordout (UNPKCHAN, FULLWORD)
		
		else
			S _ (if CODE land '177 = 0 then  else CODE);
			for I _ 1 thru SIZE do
				out (UNPKCHAN, S)
		;
		release (UNPKCHAN);
		release (PACKCHAN);
		return
	;

	THISWORD _ wordin (PACKCHAN);	NEXTWORD _ wordin (PACKCHAN);
	CHECKSUM _ THISWORD xor NEXTWORD;
	CODE _ BITPTR _ 0;
	TOTBITS _ inbits (18);		! Number of bits used, mod 2^18;
	USED _ OSIZE - 1;	! Last word not counting 0 at end;
	if (I _ (36 + USED*36 - TOTBITS) land '777777)  36 then
		USED _ 0	! Bits used inconsistent with file size;
	else
		TOTBITS _ USED*36 - I - 18;	! Useful bits remaining;
		USED _ inbits (18)	! So far so good, now read encoding;
	;
	BYTESIZE _ (if (NONTEXT _ USED land (1 lsh 17)) then 9 else 7);
	for BIT _ 1 thru MX do
		if USED land (1 lsh (BIT-1)) then
			HOWMANY _ inbits (BYTESIZE);
			for I _ 1 thru HOWMANY do
				CH _ inbits (BYTESIZE);
				SIZE _ ((MXBIT lsh 1) lsh -BIT);
				if CODE + SIZE > (MXBIT lsh 1) then
					print ("Input not packed.", );
					release (PACKCHAN);
					return
				;
				for J _ 0 thru SIZE-1 do
					TABLE[CODE+J] _ CH;
					SKIP[CODE+J] _ BIT
				;
				CODE _ CODE + SIZE
			
		;
	if CODE  (MXBIT lsh 1) then
		print ("Input not packed.", );
		release (PACKCHAN);
		return
	;

	if ASKOUT then			! Waited until now to avoid disaster if;
		release (PACKCHAN);	! file names given in wrong order;
		return
	;

	BRK _ FULLWORD _ S _ null;
	while TOTBITS > 0 do
		CH _ inbits (MX, false);	! "false" stops incr of bitptr;
		if NONTEXT then
			FULLWORD _ (FULLWORD lsh 9) lor TABLE[CH];
			if (BRK _ BRK + 1) = 4 then
				wordout (UNPKCHAN, FULLWORD);
				BRK _ FULLWORD _ 0
			
		
		else
			if length (S _ S & (if TABLE[CH]=0 then  else TABLE[CH]))  1000 then
				out (UNPKCHAN, S);
				S _ null
			;
		BITPTR _ BITPTR + SKIP[CH];
		TOTBITS _ TOTBITS - SKIP[CH]
	;
	CH _ inbits (0);	! Force it to read the last word for checksum;
	if TOTBITS < 0 then	! Oh dear, it didn't come out on the right bit;
		print ("Unpacked file is badly garbled.", )
	else if CHECKSUM then
		print ("Unpacked file is slightly garbled.", );

	if not NONTEXT then
		out (UNPKCHAN, S);

	release (PACKCHAN);	! Have to release (not just close) so that;
	release (UNPKCHAN)	! subsequent operations can getchan same chans;
;
comment HELP, DETAIL, OPTMESS;

procedure HELP;
	print ("
If you don't know what this program is for, type ! for details.
Command summary:
   P	Pack text file
   U	Unpack packed file
   V	enter Verbose mode
   T	enter Terse mode
   ?	print this
   !	print explanation
   O	explain Optimisation messages
   E	exit
");


procedure DETAIL;
	print ("
This program uses a Shannon  variable-length bit encoding to  condense
the contents of a text file.  Since the nature of the encoding is such
that the characters occurring most frequently in the original file are
encoded using the fewest bits, the ""packed"" file tends to require much
less disk space than the  original.  Typical savings range between  25
and 50%.  Larger  savings can  be expected for  files in  which a  few
characters are highly predominant.   Smaller savings (or even,  heaven
forbid, negative savings) may occur if  the original uses most of  the
Stanford Ascii character  set and  the characters  occur with  roughly
uniform distribution, in which case the overhead involved in recording
the encoding  pattern  tends  to exceed  the  savings  accrued.   This
overhead also acts to decrease the savings realised for smaller  files
in general.

The packing operation scans the input file twice; once to establish  a
character-frequency distribution,  and  once  to  perform  the  actual
packing.  Between  passes, the  sizes of  the old  and new  files  are
printed, along with the percentage saved.

The packed  file may  be  unpacked (thank  goodness) using  this  same
program.  The unpacking operation requires only a single pass  through
the file and  is very fast.   (Not that the  packing operation is  all
that slow, either!)", "

The commands V  and T  switch the  program between  Verbose and  Terse
modes.  (It starts out  in Terse mode.)  In  Verbose mode, it  reports
various statistics while packing,  to wit, the character  distribution
table,  the  number  of  bits  initially  chosen  to  represent   each
character, the amounts saved by optimising the encoding, and the final
encoding.  If you're curious as  to how to interpret the  optimisation
messages, use  the  command O.   Verbose  mode has  no  effect  during
unpacking.

Some notes:
[1] The packing operation  discards nulls and  line numbers, and  also
    removes an ETV directory if  one existed.  These are not  restored
    upon unpacking.
[2] If the file is not a text file (i.e., it has line-number words not
    in SOS format), the program offers to pack it anyway, using  9-bit
    bytes instead of 7-bit characters.  Unpacking still works, and the
    savings are often impressive, since most non-text files have large
    quantities of zero-bytes.
[3] With  the  exception  of  crlfs,  this  program  does  not  detect
    frequently-occurring sequences of characters; the packing is on  a
    strict character-by-character basis.  Hence the savings are not as
    great as  with  ""crunch"" and  ""spindle"".   (Type HELP  SPINDL  for
    details on these programs.)  Nor are packed files compatible  with
    the SPINDL program.  On the  other hand, this program is  somewhat
    less complicated to use, and it works on non-text files (which the
    other programs do not).
[4] Due to the density of information in packed files, any single  bit
    being incorrect  due to  I/O  errors or  the  like can  cause  the
    unpacking operation to yield utter garbage.  Usually, however, the
    program gets back in step fairly quickly, so only a few characters
    are actually munged.  The program puts  out a checksum at the  end
    of the file to help it detect when such an error has occurred.
");


procedure OPTMESS;
	print ("
If you are in Verbose mode, the  following 2 types of messages may  be
printed out while the bit-encoding is being optimised.
	Tweaking x and y inward saves z bits.
	Tweaking x outward saves z bits.
In the first case,  it means that the  least frequent character  which
was being  represented by  x bits  is being  changed to  an  (x+1)-bit
representation, and  the  most  frequent  2^(y-x-1)  characters  being
represented by y bits are being  changed to y-1.  In the second  case,
it means that the most frequent character being represented by x  bits
is being changed to x-1, and the 2 least frequent are being changed to
x+1.  In either case, the change causes  a reduction of z bits in  the
size of the packed file.  z may equal 0 only for inward tweaks.
");
comment Main program;

CHAR _ VERBOSE _ 0;
setformat (0, 2);
while CHAR  "e" do
	case CHAR of 
	["p"]	pack;
	["u"]	unpack;
	["v"]		VERBOSE _ true;
			print ("erbose mode.")
		;
	["t"]		VERBOSE _ false;
			print ("erse mode.")
		;
	["?"]	help;
	["!"]	detail;
	["o"]	optmess;
	else	print ("Type ? for help")
	;
	print (, "*");
	CHAR _ inchrw lor '40
;
print ("xit.", )

end "PACK"