Google
 

Trailing-Edge - PDP-10 Archives - decuslib10-08 - 43,50476/daverb.for
There are 2 other files named daverb.for in the archive. Click here to see a list.
      SUBROUTINE DAVERB(LOWWRD,MAXWRD,IWORD ,LOWKNT,MAXKNT,
     1    KNTLTR,IBUFFR,MAXBFR,LOWBFR,KIND  ,MATCH ,LCNWRD,
     2    LCNKNT,LCNBFR)
C     RENBR(/IDENTIFY WORDS OR ABBREVIATIONS)
C
C     DONALD BARTH, HARVARD BUSINESS SCHOOL
C
C     DAVERB  INTERPRETS  AN  ARRAY  READ  BY  THE  CALLING
C     PROGRAM   WITH   A  MULTIPLE  OF  AN  A1  FORMAT  AND
C     IDENTIFIES THE WORDS AND WORD ABBREVIATIONS CONTAINED
C     IN  THIS  ARRAY.   THE  WORDS ARE RECOGNIZED BY BEING
C     MATCHED AGAINST A USER DEFINED  DICTIONARY.   IF  THE
C     ARRAY   CONTAINS   ABBREVIATIONS   OF  WORDS  IN  THE
C     DICTIONARY, THEN DAVERB ALSO SPECIFIES WHETHER  THESE
C     ABBREVIATIONS ARE AMBIGUOUS.
C
C     ARGUMENT LIST DEFINITIONS:
C
C     LOWBFR IS USED  FOR BOTH  INPUT  AND OUTPUT.    KIND,
C     MATCH, LCNWRD,  LCNKNT AND  LCNBFR ARE  USED ONLY FOR
C     OUTPUT.  REMAINING ARGUMENTS ARE USED ONLY FOR INPUT.
C
C     LOWWRD = SUBSCRIPT  OF LOCATION IN  IWORD ARRAY WHICH
C              CONTAINS 1ST LETTER OF 1ST WORD.   NOTE THAT
C              IF KNTLTR(LOWKNT) IS NEGATIVE,  THEN THE 1ST
C              LETTER OF  1ST WORD  WILL BE  FOUND IN ARRAY
C              LOCATION IWORD(LOWWRD-KNTLTR(LOWKNT)).
C     MAXWRD = DIMENSION OF IWORD ARRAY.
C     IWORD  = DICTIONARY ARRAY  CONTAINING  CHARACTERS  OF
C              WORDS  TO  BE  RECOGNIZED,  1  CHARACTER PER
C              ARRAY LOCATION AS READ BY A1 FORMAT OR  ELSE
C              DEFINED BY 1H FIELD.  SECTIONS OF A WORD CAN
C              BE ABBREVIATED AND/OR SEPARATED BY SPACES OR
C              TABS  IF THE WORD IN IWORD CONTAINS A SINGLE
C              SPACE BETWEEN EACH SUCH SECTION AND  IF  THE
C              LENGTH  STORED  IN  THE  KNTLTR ARRAY IS 100
C              MORE THAN THE ACTUAL LENGTH  (INCLUDING  THE
C              SPACES).   ALL  LETTERS  IN  THE IWORD ARRAY
C              MUST BE UPPER CASE.
C     LOWKNT = SUBSCRIPT OF  KNTLTR ARRAY LOCATION DEFINING
C              LENGTH OF FIRST WORD WHICH CAN BE MATCHED IN
C              THE IWORD ARRAY.  THIS FIRST WORD WILL START
C              AT  IWORD(LOWWRD).   IF NO  WORDS ARE  TO BE
C              RECOGNIZED,  THEN  EITHER  MAXKNT  SHOULD BE
C              LESS  THAN LOWKNT,  OR ELSE  BOTH LOWKNT AND
C              MAXKNT CAN POINT  TO THE SAME ZERO  ENTRY IN
C              THE KNTLTR ARRAY.
C     MAXKNT = SUBSCRIPT OF  KNTLTR ARRAY LOCATION DEFINING
C              LENGTH OF FINAL WORD WHICH CAN BE MATCHED IN
C              THE IWORD ARRAY.
C     KNTLTR = ARRAY CONTAINING THE NUMBERS  OF  CHARACTERS
C              IN  THE WORDS IN THE IWORD ARRAY.  A ZERO OR
C              NEGATIVE VALUE IN THE KNTLTR  ARRAY  OFFSETS
C              THE  NEXT POSSIBLE WORD WHICH CAN BE MATCHED
C              IN THE IWORD ARRAY BY THE NUMBER OF  LETTERS
C              GIVEN  BY THE ABSOLUTE VALUE OF THE NEGATIVE
C              NUMBER IN THE KNTLTR  ARRAY.   DIMENSION  OF
C              KNTLTR MUST BE AT LEAST MAXKNT.  FOR EXAMPLE
C              TO RECOGNIZE THE WORDS
C
C                   YES, NO, MAYBE
C
C              THE CONTENTS OF THE IWORD ARRAY WOULD BE
C
C                   1HY,1HE,1HS,1HN,1HO,1HM,1HA,1HY,1HB,1HE
C
C              AND CONTENTS OF THE KNTLTR ARRAY WOULD BE
C
C                    3,2,5
C
C              IF A  WORD  IN  THE   IWORD  ARRAY  CONTAINS
C              EMBEDDED  SPACES,  THEN 100 MUST BE ADDED TO
C              THE LENGTH  STORED  FOR  THIS  WORD  IN  THE
C              KNTLTR  ARRAY  TO  ALLOW  THE PORTION OF THE
C              WORD  LEFT OF  THE SPACE TO  BE ABBREVIATED.
C              VALUES 101 THROUGH  199 IN KNTLTR ARRAY THUS
C              INDICATE WORDS CONTAINING  SPACES WHICH HAVE
C              LENGTHS  OF  1 THROUGH 99 RESPECTIVELY.  THE
C              VALUE 100 IN THE KNTLTR ARRAY IS TREATED THE
C              SAME AS A ZERO.
C     IBUFFR = INPUT  BUFFER  ARRAY  CONTAINING  CHARACTERS
C              TYPED BY USER,  READ BY A  MULTIPLE OF AN A1
C              FORMAT, WHICH IS  TO BE SEARCHED  FOR WORDS.
C              IBUFFR THEN  CONTAINS 1  LETTER PER COMPUTER
C              STORAGE  LOCATION.   LETTERS  IN THE  IBUFFR
C              ARRAY CAN BE EITHER UPPER OR LOWER CASE.
C     MAXBFR = MAXIMUM SUBSCRIPT  OF  IBUFFR  ARRAY  TO  BE
C              SEARCHED
C     LOWBFR = SUBSCRIPT WITHIN THE  IBUFFR  ARRAY  OF  THE
C              FIRST  (LEFTMOST)  CHARACTER  WHICH  CAN  BE
C              SCANNED FOR WORDS.  LOWBFR WILL BE  RETURNED
C              POINTING  TO THE  NEXT  CHARACTER  BEYOND  A
C              MATCHED  WORD  IF A WORD IS FOUND.  IF THERE
C              IS NOTHING AT OR TO RIGHT  OF  LOWBFR,  THEN
C              LOWBFR WILL BE LEFT POINTING AT MAXBFR+1 AND
C              KIND  WILL  BE  RETURNED   CONTAINING   ONE.
C              LOWBFR MUST BE SET BY CALLING PROGRAM BEFORE
C              ANYTHING IS  PROCESSED IN  CURRENT  CONTENTS
C              OF  THE IBUFFR ARRAY, BUT THEN SHOULD NOT BE
C              MODIFIED BY CALLING PROGRAM UNTIL THE ENTIRE
C              CONTENTS OF IBUFFR ARRAY HAS BEEN PROCESSED.
C     KIND   = RETURNED DESCRIBING THE KIND OF ITEM LOCATED
C              IN THE IBUFFR ARRAY.
C            = 1, NOTHING  WAS FOUND AT OR  TO THE RIGHT OF
C              LOWBFR.  THE  CALLING  PROGRAM SHOULD READ A
C              NEW LINE INTO IBUFFR.
C            = 2, ACCEPTABLE  WORD OR  ABBREVIATION THEREOF
C              WAS NOT FOUND,  BUT A PRINTING CHARACTER WAS
C              FOUND WHICH  DOES NOT BEGIN  ANY WORD IN THE
C              DICTIONARY.   LOWBFR IS RETURNED POINTING TO
C              THIS PRINTING CHARACTER.
C            = RETURNED  CONTAINING 3, 4  OR 5 IF A WORD IN
C              THE DICTIONARY  WAS MATCHED  EVEN PARTIALLY.
C              FOR EXAMPLE, IF DICTIONARY CONTAINED BOTH OF
C              THE WORDS NO AND NONE, THEN
C              A) INITIAL LETTER N  IN THE BUFFER  FOLLOWED
C                 BY SOME CHARACTER OTHER THAN THE LETTER O
C                 WOULD BE  AMBIGUOUS  ABBREVIATION AND THE
C                 POINTER  NAMED  MATCH  WOULD BE  RETURNED
C                 POINTING TO  (CONTAINING  SEQUENCE NUMBER
C                 WITHIN DICTIONARY  OF) WHICHEVER  WORD NO
C                 OR NONE APPEARED FIRST IN THE DICTIONARY.
C              B) INITIAL LETTERS N AND  O FOLLOWED BY SOME
C                 CHARACTER OTHER  THAN THE  LETTER N WOULD
C                 BE AN EXACT MATCH WITH THE WORD NO.
C              C) INITIAL LETTERS N AND  O AND N WOULD BE A
C                 PARTIAL  BUT NONAMBIGUOUS MATCH  WITH THE
C                 WORD NONE.
C              LEADING SPACES AND/OR  TABS ARE IGNORED.   A
C              STRING  OF  CHARACTERS  CONTAINING  EMBEDDED
C              SPACES AND/OR  TABS CAN MATCH  A WORD IN THE
C              DICTIONARY  ONLY IF  THE WORD  IN DICTIONARY
C              CONTAINS A  SINGLE SPACE AT  THE POSITION AT
C              WHICH  THE SPACES AND/OR TABS   ARE  ALLOWED
C              (BUT NOT NECESSARY).
C            = 3, A WORD IN THE  IWORD  ARRAY  WAS  MATCHED
C              EXACTLY.   MATCH  IS RETURNED CONTAINING THE
C              SEQUENCE NUMBER OF THE WORD MATCHED  IN  THE
C              IWORD ARRAY.
C            = 4, A NONAMBIGUOUS ABBREVIATION OF A WORD  IN
C              THE   IWORD   ARRAY  WAS  FOUND.   MATCH  IS
C              RETURNED CONTAINING THE SEQUENCE  NUMBER  OF
C              THE WORD IN THE IWORD ARRAY.
C            = 5, AN AMBIGUOUS ABBREVIATION OF A  WORD  WAS
C              FOUND.   MATCH  IS  RETURNED  CONTAINING THE
C              SEQUENCE NUMBER OF THE FIRST WORD MATCHED IN
C              THE IWORD ARRAY.
C     MATCH  = RETURNED CONTAINING THE SEQUENCE NUMBER OF A
C              WORD  MATCHED  IN THE IWORD ARRAY IF KIND IS
C              RETURNED CONTAINING 3, 4 OR 5.  FOR EXAMPLE,
C              IF  THE  SECOND  WORD IS MATCHED, THEN MATCH
C              WOULD  BE  RETURNED   CONTAINING   2.    THE
C              SEQUENCE  NUMBER  OF  THE  WORD IN THE IWORD
C              ARRAY DOES NOT INCLUDE THE  LETTERS  SKIPPED
C              OVER  BY  THE  VALUE OF LOWWRD, AND DOES NOT
C              INCLUDE THE LETTERS SKIPPED OVER BY NEGATIVE
C              VALUES  ENCOUNTERED  IN  THE  KNTLTR  ARRAY.
C              MATCH IS RETURNED CONTAINING KIND-2 IF  KIND
C              IS RETURNED .LE.2 INDICATING THAT NO WORD IN
C              THE  IWORD  ARRAY  COULD  BE  MATCHED   EVEN
C              PARTIALLY.   THIS MEANS  THAT IF THE CALLING
C              PROGRAM  TESTS FOR  KIND=5 AFTER  THE RETURN
C              FROM DAVERB, AND IF KIND=4 IS TO BE TAKEN AS
C              EQUIVALENT  TO KIND=3,  THEN CALLING PROGRAM
C              CAN ADD 2 TO THE VALUE OF MATCH AND USE THIS
C              SUM AS INDEX FOR A COMPUTED GO TO STATEMENT.
C     LCNWRD = IF KIND IS RETURNED CONTAINING 3 OR GREATER,
C              LOCWRD IS  RETURNED WITH  SUBSCRIPT OF IWORD
C              LOCATION CONTAINING FIRST  LETTER OF MATCHED
C              WORD.
C     LCNKNT = IF KIND IS RETURNED CONTAINING 3 OR GREATER,
C              LCNKNT IS RETURNED  WITH SUBSCRIPT OF KNTWRD
C              LOCATION CONTAINING THE WORD LENGTH.
C     LCNBFR = IF KIND IS RETURNED CONTAINING 3 OR GREATER,
C              INDICATING  THAT  A WORD OR ITS ABBREVIATION
C              WAS   FOUND,   THEN   LCNBFR   IS   RETURNED
C              CONTAINING THE SUBSCRIPT OF THE IBUFFR ARRAY
C              LOCATION WHICH CONTAINS THE FIRST  CHARACTER
C              OF THE WORD OR ITS ABBREVIATION.
C
      DIMENSION IBUFFR(MAXBFR),IWORD(MAXWRD),
     1KNTLTR(MAXKNT),KONVRT(10),KAPITL(26),LOWER(26)
C
C     CCCC       AAA UU   UU TTTTTTTT IIII  OOOO   NN    NN
C   CC          AAAA UU   UU    TT     II  OO  OO  NNN   NN
C  CC          AA AA UU   UU    TT     II OO    OO NNNN  NN
C  CC         AA  AA UU   UU    TT     II OO    OO NN NN NN
C  CC        AAAAAAA UU   UU    TT     II OO    OO NN  NNNN
C   CC      AA    AA  UU UU     TT     II  OO  OO  NN   NNN
C     CCCC AA     AA   UUU      TT    IIII  OOOO   NN    NN
C
C     TO CONVERT LOWER  CASE  LETTERS  IN  THE  INPUT  TEXT
C     BUFFER  INTO  UPPER CASE LETTERS WHICH CAN BE MATCHED
C     AGAINST THE DICTIONARY,  THIS  ROUTINE  COMPARES  THE
C     CHARACTERS IN THE INPUT TEXT BUFFER AGAINST THE LOWER
C     CASE LETTERS IN THE LOWER ARRAY.  THE LETTERS IN  THE
C     LOWER  ARRAY MUST BE ARRANGED IN INCREASING NUMERICAL
C     ORDER.  IF THE NUMERICAL ORDER IS NOT THE SAME AS THE
C     ALPHABETICAL   ORDER,   THEN   THE   DATA  STATEMENTS
C     APPEARING BELOW MUST BE CHANGED OR ELSE SOME  OR  ALL
C     LOWER  CASE LETTERS IN THE INPUT TEXT BUFFER WILL NOT
C     BE TREATED AS EQUIVALENT TO THE  CORRESPONDING  UPPER
C     CASE  LETTERS.   ONCE THE LETTERS IN THE  LOWER ARRAY
C     ARE SORTED INTO INCREASING NUMERICAL ORDER, THE UPPER
C     CASE LETTERS IN THE KAPITL ARRAY SHOULD BE REARRANGED
C     SO THAT LOWER AND UPPER CASE VERSIONS OF EACH  LETTER
C     APPEAR  IN  LOCATIONS  IN THE LOWER AND KAPITL ARRAYS
C     HAVING THE SAME SUBSCRIPTS.
C
C     IF THE COMPUTER UPON WHICH THIS ROUTINE IS USED  DOES
C     NOT  SUPPORT  LOWER CASE LETTERS, THEN BOTH THE LOWER
C     AND KAPITL ARRAYS CAN CONTAIN THE LETTERS 1HA THROUGH
C     1HZ  IN  ALPHABETICAL  ORDER (EVEN IF THIS IS NOT THE
C     NUMERICALLY SORTED ORDER).
C
C     KAPITL = UPPER CASE LETTERS A THROUGH Z SORTED ON
C              LOWER ARRAY
      DATA KAPITL/
     11HA,1HB,1HC,1HD,1HE,1HF,1HG,1HH,1HI,1HJ,
     21HK,1HL,1HM,1HN,1HO,1HP,1HQ,1HR,1HS,1HT,
     31HU,1HV,1HW,1HX,1HY,1HZ/
C
C     LOWER  = LOWER CASE LETTERS A THROUGH Z SORTED INTO
C              NUMERICALLY INCREASING ORDER
      DATA LOWER/
     11Ha,1Hb,1Hc,1Hd,1He,1Hf,1Hg,1Hh,1Hi,1Hj,
     21Hk,1Hl,1Hm,1Hn,1Ho,1Hp,1Hq,1Hr,1Hs,1Ht,
     31Hu,1Hv,1Hw,1Hx,1Hy,1Hz/
C
C     IBLANK = THE BLANK OR SPACE CHARACTER
C     ITAB   = TABULATION  CHARACTER,  THIS CAN BE REPLACED
C              BY SPACE IF TAB CHARACTER IS NOT AVAILABLE
      DATA IBLANK,ITAB/1H ,1H	/
C
C     SEARCH FOR FIRST PRINTING CHARACTER
      GO TO 2
    1 LOWBFR=LOWBFR+1
    2 IF(LOWBFR.GT.MAXBFR)GO TO 29
      NOWLTR=IBUFFR(LOWBFR)
      IF(NOWLTR.EQ.IBLANK)GO TO 1
      IF(NOWLTR.EQ.ITAB)GO TO 1
C
C     SET INITIAL CONSTANTS IF FIND PRINTING CHARACTER
      LMTBFR=MAXBFR
      LCNBFR=LOWBFR
      IEND=LOWWRD
      MSTSAM=1
      KNTKNV=0
      KNTWRD=LOWKNT-1
      INDEX=0
    3 IEXACT=1
    4 KNTWRD=KNTWRD+1
      IF(KNTWRD.GT.MAXKNT)GO TO 28
C
C     GET NEXT WORD IN DICTIONARY
      JEND=KNTLTR(KNTWRD)
      KEND=JEND-100
      IF(KEND.GE.0)JEND=KEND
      IF(JEND.LE.0)GO TO 27
      KEND=0
      NXTCMP=IEND
      IEND=IEND+JEND
      JEXACT=-1
      INDEX=INDEX+1
      NXTBFR=LOWBFR
      NOWSAM=1
C
C     GET NEXT CHARACTERS TO BE COMPARED
    5 IF(NXTBFR.GT.LMTBFR)GO TO 22
      KOMPAR=IBUFFR(NXTBFR)
      IF(KOMPAR.EQ.IBLANK)GO TO 15
      IF(KOMPAR.EQ.ITAB)GO TO 15
      IF(NOWSAM.LE.KNTKNV)GO TO 13
C
C     DETERMINE UPPER CASE  VERSION OF A LOWER CASE LETTER.
C     THIS IS A TERNARY SEARCH TAKING ADVANTAGE OF THE SIZE
C     OF  ALPHABET BEING NEARLY 3**3.  THE 3RD OF THE ARRAY
C     CONTAINING THE DESIRED LETTER IS FIRST LOCATED,  THEN
C     THE  3RD  OF  THIS  3RD,  AND  FINALLY  EACH  OF  THE
C     REMAINING 3  LETTERS  ARE  TESTED  INDIVIDUALLY.   TO
C     PREVENT  TESTING  AGAINST  THE 27TH LETTER WHICH DOES
C     NOT EXIST, UPPER 3RD  IS  TAKEN  AS  UPPER  9  SORTED
C     LETTERS,  RATHER THAN FROM 19TH THROUGH 27TH LETTERS,
C     SO THAT LOWER(18) IS TESTED AGAINST IN UPPER 3RD EVEN
C     THOUGH LETTER BEING MATCHED HAS ALREADY BEEN FOUND TO
C     BE LARGER THAN THIS.
      IF(KOMPAR.GT.LOWER(18))GO TO 7
      IF(KOMPAR.GT.LOWER(9))GO TO 6
      IF(KOMPAR.LT.LOWER(1))GO TO 11
      KUT=3
      GO TO 8
    6 KUT=12
      GO TO 8
    7 IF(KOMPAR.GT.LOWER(26))GO TO 11
      KUT=20
    8 IF(KOMPAR.LE.LOWER(KUT))GO TO 9
      KUT=KUT+3
      IF(KOMPAR.GT.LOWER(KUT))KUT=KUT+3
    9 IF(KOMPAR.EQ.LOWER(KUT))GO TO 10
      KUT=KUT-1
      IF(KOMPAR.EQ.LOWER(KUT))GO TO 10
      KUT=KUT-1
      IF(KOMPAR.NE.LOWER(KUT))GO TO 11
   10 KOMPAR=KAPITL(KUT)
   11 IF(KNTKNV.GE.10)GO TO 12
      KNTKNV=KNTKNV+1
      KONVRT(KNTKNV)=KOMPAR
C
C     DETERMINE IF LETTER IN BUFFER MATCHES DICTIONARY.
   12 IF(KOMPAR.EQ.IWORD(NXTCMP))GO TO 19
      GO TO 14
   13 IF(KONVRT(NOWSAM).EQ.IWORD(NXTCMP))GO TO 19
   14 IF(KEND.LE.0)GO TO 23
      GO TO 17
   15 IF(KEND.LE.0)GO TO 23
   16 NXTBFR=NXTBFR+1
      IF(NXTBFR.GT.LMTBFR)GO TO 22
      IF(IBUFFR(NXTBFR).EQ.IBLANK)GO TO 16
      IF(IBUFFR(NXTBFR).EQ.ITAB)GO TO 16
   17 KEND=0
   18 IF(IWORD(NXTCMP).EQ.IBLANK)GO TO 20
      JEXACT=0
      NXTCMP=NXTCMP+1
      IF(NXTCMP.LT.IEND)GO TO 18
      GO TO 24
   19 NOWSAM=NOWSAM+1
      NEWBFR=NXTBFR
      KEND=JEND
      NXTBFR=NXTBFR+1
   20 NXTCMP=NXTCMP+1
   21 IF(NXTCMP.LT.IEND)GO TO 5
      GO TO 24
C
C     WORD CANNOT EXTEND FURTHER TO RIGHT
   22 LMTBFR=NEWBFR
   23 JEXACT=0
   24 IF(NOWSAM.LT.MSTSAM)GO TO 4
      IF(NOWSAM.GT.MSTSAM)GO TO 26
      IF(IEXACT.GE.0)GO TO 25
      IF(JEXACT.LT.0)GO TO 3
      GO TO 4
   25 IF(JEXACT.GE.0)GO TO 3
   26 IEXACT=JEXACT
      MSTSAM=NOWSAM
      MATCH=INDEX
      LSTBFR=NEWBFR+1
      LCNKNT=KNTWRD
      LCNWRD=IEND
      GO TO 4
   27 IEND=IEND-JEND
      GO TO 4
C
C     ENTIRE DICTIONARY HAS BEEN SEARCHED
   28 IF(MSTSAM.LE.1)GO TO 30
      LOWBFR=LSTBFR
      KIND=4+IEXACT
      JEND=KNTLTR(LCNKNT)
      IF(JEND.GE.100)JEND=JEND-100
      LCNWRD=LCNWRD-JEND
      GO TO 31
C
C     NO PRINTING CHARACTERS WERE FOUND TO BE IDENTIFIED
   29 KIND=1
      MATCH=-1
      GO TO 31
C
C     NOT EVEN A PARTIAL MATCH COULD BE MADE
   30 KIND=2
      MATCH=0
C
C     RETURN TO CALLING PROGRAM
   31 RETURN
C
C     IEXACT = -1, EXACT MATCH FOUND BUT MUST CHECK THAT
C              A LONGER MATCH CANNOT BE FOUND WITH ANOTHER
C              WORD (FOR EXAMPLE, IF IWORD ARRAY CONTAINS
C              BOTH OF THE WORDS NO AND NONE, THEN THE
C              BUFFER CONTENTS "NON" WOULD MATCH WORD NO
C              EXACTLY, BUT THE PARTIAL MATCH WITH WORD
C              NONE WOULD BE BETTER)
C            = 0, A PARTIAL MATCH HAS BEEN FOUND
C            = 1, NO MATCH FOUND OR DUPLICATE PARTIAL
C     MSTSAM = 1 + MAXIMUM NUMBER OF LETTERS MATCHED
C     NOWSAM = 1 + NUMBER OF LETTERS MATCHING CURRENT WORD
C864241272470abcdefghijklmnopqrstuvwxyz
      END