Trailing-Edge
-
PDP-10 Archives
-
SRI_NIC_PERM_SRC_3_19910112
-
mm-dom/fscopy.mac
There are 3 other files named fscopy.mac in the archive. Click here to see a list.
TITLE FSCOPY - Fast String Copy
SUBTTL Courtesy of KLH
SEARCH MACSYM
;;;AC's
A=:1 ;Source BP
B=:2 ;Temp
C=:3 ;Byte count
D=:4 ;Local AC's follow
E=:5
T=:6
U=:7
V=:10
W=:11
L=:12
M=:13
N=:14
O=:15 ;Destination BP
KLWINC==^D18 ;# chars at which hairy word move starts
; winning over BP loop
$STENT==1 ;offset from beg of loop for entry to
; STORE phase
$GENT==4 ;offset from beg of loop for entry to GET phase
.PSECT DATA
FSCPKL: 0 ;a LSHC A,<n> for SHIFT-IN goes here
LSH A,1
MOVEM A,(C) ;Address of dest stored in RH here
0 ;a LSHC A,<n> for SHIFT-OUT goes here
MOVE B,(C) ;Address of source stored in RH here
AOBJN C,FSCPKL
JRST @FENTRM(D)
.ENDPS
.PSECT CODE
FSCOPY::CAIL C,KLWINC ;Less than break-even point?
IFSKP.
DO. ;Yes, can't win
ILDB B,A ;Simple byte-by-byte copying.
IDPB B,O
SOJG C,TOP.
ENDDO.
RET
ENDIF.
;;;Wheee, using hairy word copying!
STKVAR <<TMPACS,20>>
HRLI B,D ;Save AC's D-O
HRRI B,D+TMPACS
BLT B,O+TMPACS
LDB E,[POINT 3,A,5] ;Get low 3 bits of P field for source
SKIPGE E,FSCHTB(E) ;Get resulting # chars, skip if addr ok
MOVEI A,1(A) ;P= 01, must bump address.
MOVEI L,1(A) ;Anyway, get addr+1 into 12.
LDB D,[POINT 3,O,5] ;Repeat procedure for dest
SKIPGE D,FSCHT2(D) ;Using slightly different table
AOSA V,O ;And addr goes into 10
MOVEI V,(O) ;And isn't normally bumped.
MOVEI O,(C) ;Update the destination pointer in o
ADJBP O,O+TMPACS ;From initial value
;Now get index for shift values, and count for words
SUBI C,(E) ;Get # chars minus those in 1st src wd.
ADDI E,-6(D) ;Get E index - D*5+S, zero based.
IDIVI C,5 ;Find # words to loop through, rem in d
MOVE B,-1(L) ;And get 1st word of source.
JRST @FPATH(E) ;Must now pick a path...
;Blt possible! Jump to FSBLT0 if no shifting needed for setup.
FSBLT0: MOVEM B,(V) ;Store source word directly
JRST FSBLT4
FSBLT: LSH B,@SHASL(E) ;Shift source up against left
MOVE A,(V) ;Get 1st wd of dest.
LSH A,@SHADR(E) ;Right-adjust it
LSHC A,@SHFIX(E) ;And get everything into A.
LSH A,1 ;Need one more bit's worth.
MOVEM A,(V) ;Store 1st wd of dest...
;Now settle down to serious BLT'ing.
FSBLT4: MOVEI T,(C) ;Transfer word count
ADDI T,(V) ;Find addr of last dest word
MOVEI V,1(V) ;Now get 1st dest addr,
HRLI V,(L) ;And put 1st source addr in LH.
BLT V,(T) ;Zoom!!
JUMPE D,FSCPY9 ;If no remainder, super win - done!
ADDI L,(C) ;Hmm, must get last source word.
MOVE B,(L) ;Like so.
MOVE A,FBMSK(D) ;And a word mask for chars
AND B,A ;Clear unused bits from source,
ANDCAM A,1(T) ;And zap target bits in dest.
IORM B,1(T) ;And stick last chars in.
JRST FSCPY9 ;Ok, all done...
;Can't do BLT. Well, get A and B set up for magical shift loop.
SHSKP2: LSH B,@SHASL(E) ;Here, only need to adjust source,
JRST SHSKP5 ;Since dest will be totally clobbered.
FSSHFT: LSH B,@SHASL(E) ;Both src and dest must be integrated.
SHSKP1: MOVE A,(V) ;Only need adjust dest; src wd is full.
LSH A,@SHADR(E)
SHSKP5: LSHC A,@SHFIX(E) ;Stuff as many chars as possible into A
CAIE D,0 ;If any remainder,
MOVEI C,1(C) ;Add 1 more word.
MOVNI C,(C) ;Make AOBJN pointer.
MOVSI C,(C)
;Now make another index for termination wrapup purposes.
ADD D,FFINDX(E) ;New idx using # chs left in last wd.
;Now set things up for loop, and enter it.
FSCPY3: HRLI V,(<MOVEM A,(C)>)
MOVEM V,FSCPKL+2 ;Address for MOVEM
HRRM L,FSCPKL+4 ;Address for MOVE
MOVE L,FSHINT(E) ;Get LSH for shift-in
MOVEM L,FSCPKL
MOVE L,FSHOUT(E) ;And shift-out
MOVEM L,FSCPKL+3
JUMPGE D,FSCPKL+$STENT ;Depending on flag, enter loop at store
SOS V,FSCPKL+2
JRST FSCPKL+$GENT ;Or at get.
;---------------------------------------------------------------------------
;Come here when loop finished. The last word of the source string
;will be in B. It may have 1 to 5 chars left for moving, but will
;never have 0.
;Long wrapup.
FSCPTL: LSHC A,@FSCPKL ;Perform a shift-in
LSH A,1
MOVEM A,@10 ;Store full word.
MOVEI C,1(C) ;Increment address index
;And drop through to Medium wrapup.
;Medium wrapup.
FSCPTM: LSHC A,@FLOUT(D) ;Shift rest of source word into A
MOVE B,@10 ;Get dest word it will be stored into
LSH B,@FLADJ(D) ;Left-adjust chars to preserve.
;And drop thru to Short wrapup.
;Short wrapup.
FSCPTS: LSHC A,@FFLOUT(D) ;Do final, last, shift-out.
ANDCMI A,1
MOVEM A,@10 ;And store last dest word.
;Done!! Just restore regs and return.
FSCPY9: HRLI N,D+TMPACS ;Restore regs
HRRI N,D
BLT N,N
RET
;Indexed by low 3 bits of P field, returns # chars
;existing to right of loc BP points to. Hence value
;ranges from 5 to 1; if P = 01, SETZ indicates that
;bp address needs incrementing.
FSCHTB: 1 ;P=10
SETZ 5 ;P=01, increment addr
0
0 ;Randomness
5 ;P=44, full word
4 ;P=35, 4 chars to go
3 ;P=26
2 ;P=17
;This table is just like FSCHTB except values are pre-multiplied
;by 5 for easy addition into E.
FSCHT2: 1*5 ;P=10
SETZ 5*5 ;P=01, increment addr
0
0 ;Random
5*5
4*5
3*5
2*5
;This table is indexed by D when it has # chars remaining from
;dividing # chars (in C) by 5. Provides mask for these chars.
FBMSK: 0 ;Nothing here.
BYTE (7) 177
BYTE (7) 177, 177
BYTE (7) 177, 177, 177
BYTE (7) 177, 177, 177, 177
;FPATH table vectors off to BLT and other minor stuff as
;soon as all the basic computations are made.
;Indexed by E.
FPATH: FSBLT
FSSHFT
FSSHFT
FSSHFT
SHSKP1
FSSHFT
FSBLT
FSSHFT
FSSHFT
SHSKP1
FSSHFT
FSSHFT
FSBLT
FSSHFT
SHSKP1
FSSHFT
FSSHFT
FSSHFT
FSBLT
SHSKP1
SHSKP2
SHSKP2
SHSKP2
SHSKP2
FSBLT0
DEFINE ENT (A,B,C,D,E) <
A*7
B*7
C*7
D*7
E*7
>;DEFINE ENT
;SHASL table, contains # bits to shift first source wd left so
;as to left-adjust it in B. Indexed by E.
SHASL: ENT 4,3,2,1,0
ENT 4,3,2,1,0
ENT 4,3,2,1,0
ENT 4,3,2,1,0
ENT 4,3,2,1,0
;SHADR table, contains # bits to shift first dest wd right so
;as to right-adjust it in A. Indexed by E.
DEFINE ENT1 (A,B,C,D,E) <
0,,A*7-1
0,,B*7-1
0,,C*7-1
0,,D*7-1
0,,E*7-1
>;DEFINE ENT1
SHADR: ENT1 -1,-1,-1,-1,-1
ENT1 -2,-2,-2,-2,-2
ENT1 -3,-3,-3,-3,-3
ENT1 -4,-4,-4,-4,-4
ENT1 -5,-5,-5,-5,-5
;SHFIX table, contains # bits to left-shift A and B combined so
;as to move as many characters out of B as possible. Indexed
;by E. MIN(d,e) (d and e after fschtb)
SHFIX: ENT 1,1,1,1,1
ENT 1,2,2,2,2
ENT 1,2,3,3,3
ENT 1,2,3,4,4
ENT 1,2,3,4,5
;FSHINT table, containing appropriate LSHC instructions for shifting
;in the first chars of a fresh source word. Indexed by E.
DEFINE ENTL (ARG1,ARG2,ARG3,ARG4,ARG5) <
LSHC A,ARG1*7
LSHC A,ARG2*7
LSHC A,ARG3*7
LSHC A,ARG4*7
LSHC A,ARG5*7
>;DEFINE ENTL
FSHINT: ENTL 5,4,3,2,1
ENTL 1,5,4,3,2
ENTL 2,1,5,4,3
ENTL 3,2,1,5,4
ENTL 4,3,2,1,5
;FSHOUT table, containing appropriate LSHC instructions for shifting
;out the last chars of an old source word, to make room for a
;new one. Indexed by E.
FSHOUT: ENTL 0,1,2,3,4
ENTL 4,0,1,2,3
ENTL 3,4,0,1,2
ENTL 2,3,4,0,1
ENTL 1,2,3,4,0
;FFINDX table, contains part of D index for fast add-in.
;Indexed by E. Similar to FSHOUT. Sign bit also indicates
;whether entry point is $STENT (pos) or $GENT (neg).
DEFINE ENTS (A,B,C,D,E) <
ENT5 A
ENT5 B
ENT5 C
ENT5 D
ENT5 E
>;DEFINE ENTS
DEFINE ENT5 (X,Y) <
X+<Y*5>
>;DEFINE ENT5
S==0B0
G==1B0
FFINDX: ENTS (<S,0>,<S,1>,<S,2>,<S,3>,<S,4>)
ENTS (<G,4>,<S,0>,<S,1>,<S,2>,<S,3>)
ENTS (<G,3>,<G,4>,<S,0>,<S,1>,<S,2>)
ENTS (<G,2>,<G,3>,<G,4>,<S,0>,<S,1>)
ENTS (<G,1>,<G,2>,<G,3>,<G,4>,<S,0>)
;Last item (5) is actually first (0)
DEFINE ENTX (A,B,C,D,E) <
7*E
7*A
7*B
7*C
7*D
>;DEFINE ENTX
;FENTRM table, dispatching to appropriate wrapup routine when fast AC
;loop is finished. Indexed by D.
FENTRM:
DEFINE ENTXJ (A,B,C,D,E) <
FSCPT'E
FSCPT'A
FSCPT'B
FSCPT'C
FSCPT'D
>;DEFINE ENTXJ
ENTXJ M,M,M,M,S
ENTXJ M,M,M,S,L
ENTXJ M,M,S,L,L
ENTXJ M,S,L,L,L
ENTXJ S,L,L,L,L
;FLOUT table, for use by Medium wrapup routine;pushes out remaining
;source chars in B, making room for incoming dest word.
;Indexed by D.
FLOUT: ENTX 1,2,3,4,0
ENTX 1,2,3,0,1
ENTX 1,2,0,1,2
ENTX 1,0,1,2,3
ENTX 0,1,2,3,4
;FLADJ table, also for Medium wrapup routine;adjusts dest word in
;B to left-adjust chars to be preserved.
FLADJ: ENTX 1,2,3,4,5
ENTX 2,3,4,5,1
ENTX 3,4,5,1,2
ENTX 4,5,1,2,3
ENTX 5,1,2,3,4
;FFLOUT table, for Short wrapup routine. Final Last shift-out of
;chars in B, so that the last dest word can be stored from A.
;Indexed by D. Adds 1 extra bit since MOVEM A, done right after it,
;and nothing to preserve in B.
FFLOUT:
DEFINE ENTX1 (A,B,C,D,E) <
E*7+1
A*7+1
B*7+1
C*7+1
D*7+1
>;DEFINE ENTX1
ENTX1 4,3,2,1,5
ENTX1 3,2,1,4,4
ENTX1 2,1,3,4,3
ENTX1 1,2,4,3,2
ENTX1 1,4,3,2,1
END