\ UNIQ PROGRAM, BY TOM ALMY.

\ THIS PROGRAM IS COPYRIGHT (C) 1985 BY TOM ALMY,
\ ALL RIGHTS RESERVED.
\ Permission is granted to registered users of ForthCMP to sell or distribute
\ computer programs incorporating the compiled contents of this file.

\ Based on the UNIX (TM Bell Labs) "uniq" program

\ DATA STORAGE
100 MSDOS
HEX 4000 DECIMAL CONSTANT BUFSIZ
INCLUDE FILTER

VARIABLE RAW-LINE   256 ALLOT   ( before preprocessing )
VARIABLE LAST-RAW-LINE 256 ALLOT ( last before preproc. )
VARIABLE LAST-LINE  256 ALLOT   ( first byte is length )
VARIABLE THIS-LINE  256 ALLOT   ( first byte is length )
VARIABLE UFLAG   ( Options )
VARIABLE DFLAG
VARIABLE CFLAG
VARIABLE SKIPCOLUMNS
VARIABLE SKIPFIELDS
VARIABLE COUNTER       ( repetitions of a line )

\ MESSAGES
0 0 IN/OUT 
: NOTICE  
   ." UNIQ PROGRAM " CR
   ." COPYRIGHT (C) 1985 BY THOMAS ALMY " CR ;

0 0 IN/OUT 
: USAGE   CONSOLE CR
 ." USAGE:  UNIQ [-options] [infile] [outfile]" CR
 ." To specify outfile without infile, give `-' for infile" CR
 ." Options are:" CR
 ."  U output non-repeated lines" CR
 ."  D output one copy of repeated lines"  CR
 ."  C give output report instead"  CR
 ."  no specification is same as `-UD'" CR
 ."  +n -- skip n fields" CR
 ."  -n  -- skip n characters  (after fields)" CR
 ABORT
  ;

\ GET OPTION ARGUMENTS

1 2 IN/OUT
: GETNUMBER ( pointerToFirstChar -- PointerAfterEnd Value )
	1- 0. ROT CONVERT -ROT DROP ;

2 1 IN/OUT 
: GET-MINUS-ARGS  ( string character -- string' )
    DUP [CHAR] a >= OVER [CHAR] z <= AND IF BL - THEN
    CASE
       [CHAR] - OF ( IGNORE ) ENDOF
       [CHAR] U OF UFLAG ON ENDOF
       [CHAR] D OF DFLAG ON ENDOF
       [CHAR] C OF CFLAG ON ENDOF
       DUP [CHAR] 9 <= OVER [CHAR] 0 >= AND IF
		SWAP 1- GETNUMBER SKIPCOLUMNS ! SWAP
	ELSE
		CONSOLE ." UNKNOWN OPTION " EMIT USAGE 
	THEN
       ENDCASE   ;

0 0 IN/OUT 
: GET-ARGS OPTIONSTRING CELL+ @ ( address )
  BEGIN  
	DUP OPTIONSTRING CELL+ @ - OPTIONSTRING @ < 
  WHILE ( continue while args )
	COUNT DUP [CHAR] + = IF
		DROP GETNUMBER SKIPFIELDS ! 
	ELSE
		GET-MINUS-ARGS
	THEN
  REPEAT
  DROP
  UFLAG @ DFLAG @ CFLAG @ OR OR 0= IF ( dc&u not specified )
  	UFLAG ON DFLAG ON THEN ;


\ GET A LINE
PRIMITIVE
: INDEX ( addr len index -- addr' len' )
   TUCK - 0 MAX  ( addr index len' )
   -ROT + SWAP ;

2 2 IN/OUT
: SKIP-FIELD ( addr len -- addr' len' )
     BL SCAN BL SKIP ;

2 2 IN/OUT
: ?SKIP-COLUMNS ( addr len -- addr' len' )
    SKIPCOLUMNS @ ?DUP IF  INDEX THEN ;

2 2 IN/OUT
: ?SKIP-FIELDS  ( addr len -- addr' len' )
    SKIPFIELDS @ 0 ?DO SKIP-FIELD LOOP ;

0 1 IN/OUT
: GET-LINE? ( -- successflag )
   RAW-LINE 1+ 255 ACCEPT ( get that line )
   DUP 0< IF DROP 0 EXIT THEN  ( EOF reached --> FAILED )
    RAW-LINE C! ( store length of raw line )
   RAW-LINE COUNT ?SKIP-FIELDS ?SKIP-COLUMNS
   DUP THIS-LINE C!
    THIS-LINE 1+ SWAP CMOVE ( move preprocessed line into place)
   -1 ( success! )  ;

\ PERFORM-UNIQ AND HELP FUNCTIONS
0 0 IN/OUT
: MAKE-IT-LAST
  THIS-LINE DUP C@ 1+ LAST-LINE SWAP  CMOVE
  RAW-LINE DUP C@ 1+ LAST-RAW-LINE SWAP CMOVE ;

0 1 IN/OUT
: LINES-SAME?  ( -- equalflag )
  THIS-LINE COUNT LAST-LINE COUNT
   ROT OVER = IF S= ELSE 2DROP DROP 0 THEN ;

0 0 IN/OUT
: SPIT-LINE
    LAST-RAW-LINE COUNT TYPE CR ;

0 0 IN/OUT
: REPORT-LINE
    COUNTER @ 1+ 4 .R  2 SPACES  SPIT-LINE ;

0 0 IN/OUT
: THE-SAME
     COUNTER @ 0= IF  DFLAG @ IF SPIT-LINE THEN THEN
     1 COUNTER +! ;

0 0 IN/OUT
: NOT-SAME
   CFLAG @ IF REPORT-LINE  COUNTER OFF
           ELSE COUNTER @ IF  COUNTER OFF  ELSE
                            UFLAG @ IF SPIT-LINE THEN
                          THEN
           THEN
   MAKE-IT-LAST ;

0 0 IN/OUT
: PERFORM-UNIQ
    GET-LINE? 0= IF EXIT THEN   MAKE-IT-LAST
    COUNTER OFF
    BEGIN  GET-LINE? WHILE
    LINES-SAME? IF THE-SAME  ELSE NOT-SAME THEN
    REPEAT
    NOT-SAME
;

\ MAIN PROGRAM
: MAIN    
  SETBUFS
  NOTICE
  SETFILES IF USAGE THEN
  GET-ARGS
  PERFORM-UNIQ
  BYE ;

INCLUDE DOS2
INCLUDE FORTHLIB
END

