Google
 

Trailing-Edge - PDP-10 Archives - decuslib10-07 - 43,50433/parse.pas
There are 2 other files named parse.pas in the archive. Click here to see a list.
(*$M-,C-,D-*)
TYPE
  SETOFCHAR = SET OF CHAR;
{a THING is a declaration used for objects that require conversion
 between SIXBIT, half words, and fullwords.}
  THING=RECORD
	CASE INTEGER OF
	1:(SIXBIT:PACKED ARRAY[1:6] OF 0..77B);
	2:(HALF:PACKED ARRAY[1:2] OF 0..777777B);
	3:(FULL:INTEGER)
	END;
{this is a string of characters, typically a file spec. Note that the
 actual string is variable length. The 150 upper bound is not actually
 enforced. See below.}
  STRING=PACKED ARRAY[1:150] OF CHAR;
{a BLOCK is a file control block, the internal data structure representing
 the state of a file.  For the structure of this block, see PASUNV.MAC}
  BLOCK=ARRAY[0:32B] OF THING;
VAR
  DELIMS,OCTALS:SETOFCHAR;

INITPROCEDURE;
 BEGIN
 DELIMS:=[':','.','[',',',']'];
 OCTALS:=['0','1','2','3','4','5','6','7'];
 END;

{PARSE is the main file name parser. It is a finite state machine.
    B - file control block. See PASUNV.MAC.  In assembly language,
	you should pass the address of the control block in AC 2 (REG)
    N - file name.  This is a string of arbitrary length.  Note that
	error checking is turned off, so bounds are not checked on
	this array.  So the code below must be very careful to handle
	the bounds properly.  This is the only way to handle arbitrary
	length strings in Pascal.  In assembly language, put the
	address of the string in AC 3 (REG1).
    COUNT - length of the string N.  The compiler supplies this in
	AC 4 (REG2) for RESET, REWRITE, etc.  Also the pseudo-type
	STRING will cause a string to be tranmitted in one AC and
	the length in the next.  Be very careful not to refer to
	anything before N[COUNT], as bounds checking is turned off.
  This routine parses a standard DEC10 file spec and puts the resulting
  sixbit stuff in the appropriate places in the file control block.
}
PROCEDURE PARSE(VAR B:BLOCK; VAR N:STRING; COUNT:INTEGER);
VAR
  ITEM:ALFA;  {Current atom, returned by GETSYM}
  BO,DOINGOCTAL,DONE:BOOLEAN;
  LAST,TOTAL,I,OCT:INTEGER;
  DUMMY:BLOCK;  {We build up the block in DUMMY.  If there is an error,
		 we do not want to have changed the real thing.}
  CH:CHAR;
  IPT,PT:INTEGER;  {PT - next character to be scanned, index into N
		    IPT - local pointer, usually index into a string
			  that is being filled}

PROCEDURE GETSYM;

{GETOCT is a subprocedure used by GETSYM to handle the syntax
 #octal digits.  As in PIP, we allow this to specify otherwise
 untypable names, e.g. #2020000033.UFD.  Reads 2 digits and
 returns the character (presumably untypable) that they
 describe in sixbit.  If just one digit there, assume 0 first
 digit. The virtual # referred to below is if there are more than
 2 digits.  Then we assume a # after the first two.}
FUNCTION GETOCT:CHAR;
%CALLED WITH PT POINTING AT # (REAL OR VIRTUAL). GETS UP TO 2
FOLLOWING OCTAL DIGITS.  LEAVES PT POINTING AT LAST CHAR GOT\
 BEGIN
 OCT:=0;
 IF(PT+1)<=LAST {PT+1 used because PT is the #}
  THEN IF N[PT+1] IN OCTALS
   THEN BEGIN
   OCT := 8 * (ORD(N[PT+1]) - 60B);
   PT := PT+1;
   IF ((PT+1) <= LAST)
    THEN IF (N[PT+1] IN OCTALS)
     THEN BEGIN
     OCT := OCT + (ORD(N[PT+1]) - 60B);
     PT := PT+1;
     END
   END;
 GETOCT := CHR (OCT + 40B);
 END;

{GETSYM gets the next atom and puts it in ITEM.  An atom is anything
 between delimiters, or a delimiter.  So each delimiter will be returned
 as a separate atom.  Updates PT to point to the character after the
 last one scanned.  This routine truncates at 6 characters, but scans
 more if they are there (usual DEC filename syntax).}
  BEGIN %GETSYM\
  ITEM:='          ';
  IPT:=1;	{IPT is pointing into ITEM.  Where the output is put.}
  CH:=' ';
  DOINGOCTAL:=FALSE;
{skip blanks and nulls}
  WHILE(PT<=LAST) AND ((N[PT]=' ') OR (ORD(N[PT])=0))
    DO PT:=PT+1;
{scan until come to end of string or a delimiter}
  WHILE(PT<=LAST) AND NOT(CH IN DELIMS) DO
    BEGIN
     %DOINGOCTAL IS ON IF CHAR AT PT SHOULD BE INTERPRETED AS OCTAL\
    IF NOT (N[PT] IN OCTALS) THEN DOINGOCTAL := FALSE;
	{map lower case to upper, turn ctl characters into blanks}
    CH:=N[PT]; IF ORD(CH)>=140B THEN CH:=CHR(ORD(CH)-40B);
    IF ORD(CH)<40B THEN CH:= ' ';
	{if octal, make sure DOINGOCTAL is on and PT is pointing to
	 real or virtual #}
    IF CH='#'
     THEN DOINGOCTAL := TRUE
      %GETOCT WILL START LOOKING AT PT+1, LEAVE PT AT LAST IT SAW\
     ELSE IF DOINGOCTAL
      THEN PT := PT-1;
       %GETOCT EXPECT PT IS #, SO THIS MAKES IT START LOOKING AT
        PT.  WE HAVE VERIFIED PT IS OCTAL (FIRST LINE OF ROUTINE), SO
        GETOCT WILL AT LEAST GOBBLE A DIGIT AT PT\
	 {copy character to ITEM unless already 6}
    IF IPT<=6
     THEN IF DOINGOCTAL
      THEN ITEM[IPT]:=GETOCT
      ELSE ITEM[IPT]:=CH;
    PT:=PT+1;  IPT:=IPT+1;
    END;
  IF PT>LAST THEN DONE:=TRUE;
	{if we have a delimiter and IPT is 2, that means the delimiter
	 was the first thing we saw.  In that case, we want to return
	 just the delimiter.  If IPT > 2, then it is a delimiter that
	 marks the end of a normal atom, and we save the delimiter
	 for next time}
  IF (CH IN DELIMS) AND (IPT>2)
    THEN BEGIN IF IPT<=7 THEN ITEM[IPT-1]:=' '; IPT:=IPT-1; PT:=PT-1; DONE:= FALSE END;
  END;

{CVSIX take the item that is in ITEM and puts it in offset INDEX in the
 file control block we are building up.  SIXBIT conversion is done.}
PROCEDURE CVSIX(INDEX:INTEGER);
  BEGIN
  FOR IPT:=1 TO 6 DO
  BEGIN
    DUMMY[INDEX].SIXBIT[IPT]:=ORD(ITEM[IPT])-40B;
END;
  END;

{CVINT takes the item that is in ITEM and puts it in the reference
 variable I.  It assumes ITEM is the character representation of an
 octal number.}
function cvint(var i:integer):Boolean;
  BEGIN
  TOTAL:=0;
  i:=0; cvint:= true;
  for ipt:= 1 to ipt-1 do
    if item[ipt] in octals
      then i:=i*8 + ord(item[ipt])-60b
      else cvint := false;
  END;

{Beginning of main scanner logic}
BEGIN
	{Initialization}
DONE:=FALSE;
LAST:=COUNT;
PT:=1;IPT:=1;
	{These are default values for the file control block}
b[1].full:=1;
DUMMY[14B].FULL:=0;
DUMMY[15B].FULL:=0;
DUMMY[12B].HALF[1]:=446353B;  {DSK}
DUMMY[12B].HALF[2]:=0;
FOR I:=20B TO 20B+5 DO
 DUMMY[I].FULL:=0;
	{Here is the beginning of the finite state machine}
{Note that this code is as careful as possible about syntax errors,
 so whenever a name is expected, you have to check to be sure you
 didn't get a delimiter instead, or end of string.}
	{State 0: parse the first atom, file name or device}
GETSYM;
if item[1] in delims then goto 6;
CVSIX(14B);  {use as file name.  this is OK even if it a device name,
		as it gives us a default file name.  So "TTY:" will
		actually result in "TTY:TTY".  This is good so we
		have a file name in case of ASS DSK TTY}
IF DONE THEN GOTO 5;
GETSYM;	     {get the delimiter}
IF ITEM[1]=':'  THEN GOTO 1
ELSE IF ITEM[1]='.' THEN GOTO 2
ELSE IF ITEM[1]='[' THEN GOTO 3
ELSE GOTO 6;

	{State 1: a device name has just been parsed.  It was
		  interpreted as a file name, so copy to device
		  name field.  If there is another atom, it is
		  unambiguously a file name.}
1:
DUMMY[12B]:=DUMMY[14B];
IF DONE THEN GOTO 5;
GETSYM;	   {get next atom. If valid, it will be a file name}
IF(ITEM[1] IN DELIMS) THEN GOTO 6;
CVSIX(14B);
IF DONE THEN GOTO 5;
GETSYM;	   {this is the delimiter after the file name}
IF ITEM[1]='.' THEN GOTO 2
ELSE IF ITEM[1]='[' THEN GOTO 3
ELSE GOTO 6;

	{State 2:  here after file name and dot.  Can be extension, PPN, or
		   end of string}
2:
IF DONE THEN GOTO 5;
GETSYM;	{next atom: PPN, extension, or junk}
IF(ITEM[1]='[') THEN GOTO 3;  {PPN}
IF(ITEM[1] IN DELIMS) THEN GOTO 6; {junk}
CVSIX(15B);  {extension: put it in extension field}
IF DONE THEN GOTO 5;
GETSYM;   {delimiter after extension}
IF(ITEM[1]='[') THEN GOTO 3
ELSE GOTO 6;

	{State 3: here after a [.  Had better be a PPN of some sort}
3:
CALLI(24B,0,0,I,BO);  {get our PPN}
DUMMY[20B].FULL:=I;   {supply our PPN as default, in case of [,]}
GETSYM;  {now get an atom.  Can be project number or comma if [,]}
IF(ITEM[1]=',') THEN  {if no proj number, we already have default}
ELSE IF(ITEM[1] IN DELIMS) THEN GOTO 6 {junk}
ELSE BEGIN
if cvint(i)
  then dummy[20b].half[1]:=i    {put proj number where it goes}
  else goto 6;
GETSYM;  	{comma after proj number - must be there}
IF NOT(ITEM[1] = ',') THEN GOTO 6;
END;
GETSYM;  {we have now seen [proj,   This is prgr num
	  or ] in case of [,], or comma in case of [,,SFD]}
IF(ITEM[1] =']') THEN GOTO 5
ELSE IF(ITEM[1]=',') THEN GOTO 4
ELSE IF (ITEM[1] IN DELIMS) THEN GOTO 6
ELSE BEGIN
if cvint(i)  {here if we have a real prgrm nummber - put it away}
  then dummy[20b].half[2]:=i
  else goto 6;
GETSYM;  {and get delimiter after prgm number - , or ]}
IF ITEM[1]=']' THEN GOTO 5
ELSE IF ITEM[1]=',' THEN GOTO 4
ELSE GOTO 6;
END;

	{State 4 - we have seen the , after a PPN - scan for SFD's}
4:
FOR I:=1 TO 5 DO
  BEGIN
  GETSYM;  {this is an SFD}
  IF (ITEM[1] IN DELIMS) THEN GOTO 6;
  CVSIX(20B+I);
  GETSYM;  {this is comma if more SFD's, or ] if no more}
  IF ITEM[1]=']' THEN GOTO 5
  ELSE IF ITEM[1] # ',' THEN GOTO 6
  END;  {normal exit from this loop is finding ] and going to 5}
GOTO 6;  {didn't terminate with ]}

	{State 5 - finished valid scan}
5:
{now we can copy the data into the real control block, since we know
 it is valid}
B[12B]:=DUMMY[12B];
B[14B]:=DUMMY[14B];
B[15B]:=DUMMY[15B];
FOR I:=20B TO 20B+5 DO
 B[I]:=DUMMY[I];
 b[1].full:=0;

6:
{exit - note that going directly here results in an error.  The caller
 tests EOF to see if this worked.  EOF is initialized to true, and is
 only set false in 5 above.}
END.