//============================================================================== // Unit: Parser // // Purpose: Declare abstract Parser class and three concrete derived parsers: // * TDelimiterParser tokenizes the string using a set of delimiters // * TColumnarParser extracts ranges of columns as tokens // * TPatternParser extracts tokens embedded in "static" patterned // text. // // Copyright: 1997, Palladion Software //============================================================================== unit Parser; interface uses Classes, SysUtils; type //************************************************************************ // Type: EParseError // // Purpose: Exception in parsing string. // //************************************************************************ EParseError = class( Exception ); //************************************************************************ // Type: TParseExecute // // Purpose: Method pointer for handling parsed tokens. // //************************************************************************ TParseEvent = procedure( Sender : TObject; Tokens : TStringList ) of object; //************************************************************************ // Type: TParseStatus // // Purpose: Current status of parser. // //************************************************************************ TParseStatus = ( parseInit, parseOk, parseFail ); //************************************************************************ // Type: TParser // // Purpose: Abstract interface class for parser hierarchy. // //************************************************************************ TParser = class( TObject ) protected FOnParse : TParseEvent; FStatus : TParseStatus; FTokens : TStringList; FToParse : String; procedure BeginParse( const strToParse : String ); function DoParse( const strToParse : String ) : Boolean; virtual; abstract; function GetTokens : TStringList; public constructor Create; destructor Destroy; override; property Status : TParseStatus read FStatus write FStatus; property ToParse : String read FToParse write BeginParse; property Tokens : TStringList read GetTokens; function PeekToken : String; function PopToken : String; //published property OnParse : TParseEvent read FOnParse write FOnParse; end; //************************************************************************ // Type: TDelimiterParser // // Purpose: Parse a string into tokens, breaking at any character in // our delimiter string. // //************************************************************************ TDelimiterParser = class( TParser ) protected FDelimiters : String; function DoParse( const strToParse : String ) : Boolean; override; public constructor Create; destructor Destroy; override; //published property Delimiters : String read FDelimiters write FDelimiters; end; //************************************************************************ // Type: TColumnarParser // // Purpose: Parse a string into tokens by columns. // //************************************************************************ TColumnarParser = class( TParser ) protected FSpecifiers : TList; FStripChars : String; function DoParse( const strToParse : String ) : Boolean; override; procedure SetColumns( const strColumnDesc : String ); public constructor Create; destructor Destroy; override; //published property StripChars : String read FStripChars write FStripChars; property ColumnDesc : String write SetColumns; end; //************************************************************************ // Type: TPatternParser // // Purpose: Parse a string into tokens using our pattern string as a // template. *) // //************************************************************************ TPatternParser = class( TParser ) protected FPattern : String; FPlaceholder : String; FSpecifiers : TList; // list of TPatternSpecifiers procedure SetPattern( const strNewPattern : String ); function DoParse( const strToParse : String ) : Boolean; override; public constructor Create; destructor Destroy; override; //published property Pattern : String read FPattern write SetPattern; property Placeholder : String read FPlaceholder write FPlaceholder; end; implementation type //************************************************************************ // Type: TPatternSpecifier // // Purpose: Helper class for TPatternParser: parse a single token and // return new position. *) // //************************************************************************ TPatternSpecifier = class( TObject ) private FPrefix : String; FShouldParse : Boolean; // False if "trailer" specifier. FDelimiters : String; public property Prefix : String read FPrefix write FPrefix; property ShouldParse : Boolean read FShouldParse write FShouldParse; property Delimiters : String read FDelimiters write FDelimiters; // Scan strToParse, skipping text which matches FPrefix. Then // parse strToken using FDelimiters. Return the position within // the string with done; on error, retturn -1. function ParseToken( const strToParse : String; nStart : integer; var strToken : String ) : integer; end; //************************************************************************ // Type: TColumnSpecifier // // Purpose: Helper class for TColumnParser: parse a single token and // return true / false to indicate success. *) // //************************************************************************ TColumnSpecifier = class( TObject ) private FColStart : integer; FColWidth : integer; FStripChars : String; function GetColEnd : integer; procedure SetColEnd( nNewEnd : integer ); public property ColStart : integer read FColStart write FColStart; property ColWidth : integer read FColWidth write FColWidth; property ColEnd : integer read GetColEnd write SetColEnd; property StripChars : String read FStripChars write FStripChars; function ParseToken( const strToParse : String; var strToken : String ) : Boolean; end; //************************************************************************ // TParser Implementation //************************************************************************ constructor TParser.Create; begin inherited Create; FTokens := TStringList.Create; FStatus := parseInit; FOnParse := Nil; end; destructor TParser.Destroy; begin FTokens.Free; inherited Destroy; end; function TParser.GetTokens : TStringList; begin if FStatus = parseOk then begin Result := FTokens; end else begin raise EParseError.Create( 'TPatternParser.Tokens accessed before successful parse.' ); end; end; function TParser.PeekToken : String; var stlTokens : TStringList; begin stlTokens := GetTokens; if ( stlTokens.Count > 0 ) then begin Result := stlTokens[ 0 ]; end else begin Result := ''; end; end; function TParser.PopToken : String; var stlTokens : TStringList; begin stlTokens := GetTokens; if ( stlTokens.Count > 0 ) then begin Result := stlTokens[ 0 ]; stlTokens.Delete( 0 ); end else begin Result := ''; end; end; procedure TParser.BeginParse( const strToParse : String ); begin FToParse := strToParse; if ( FToParse = '' ) then begin FStatus := parseInit; end else begin if ( DoParse( strToParse ) ) then begin FStatus := parseOk; if Assigned( FOnParse ) then begin FOnParse( Self, FTokens ); end; end else begin FStatus := parseFail; end; end; end; //************************************************************************ // TDelimiterParser Implementation //************************************************************************ constructor TDelimiterParser.Create; begin inherited Create; end; destructor TDelimiterParser.Destroy; begin inherited Destroy; end; function TDelimiterParser.DoParse( const strToParse : String ) : Boolean; var i, nLast : integer; begin FTokens.Clear; nLast := 0; for i := 1 to Length( strToParse ) do begin if ( Pos( strToParse[i], FDelimiters ) > 0 ) then begin if ( i - nLast > 1 ) then // No empty tokens begin FTokens.Add( Copy( strToParse, nLast + 1, i - nLast - 1 ) ); end; nLast := i; end; end; if ( nLast < Length( strToParse ) ) then begin FTokens.Add( Copy( strToParse, nLast + 1, 999 ) ); end; Result := ( FTokens.Count > 0 ); end; //************************************************************************ // TPatternParser Implementation //************************************************************************ constructor TPatternParser.Create; begin inherited Create; FSpecifiers := TList.Create; end; destructor TPatternParser.Destroy; begin FSpecifiers.Free; inherited Destroy; end; function TPatternParser.DoParse( const strToParse : String ) : Boolean; var iSpecifier : integer; iCharPos : integer; strToken : String; specifier : TPatternSpecifier; begin FTokens.Clear; Result := False; iCharPos := 1; for iSpecifier := 0 to FSpecifiers.Count - 1 do begin specifier := FSpecifiers.Items[ iSpecifier ]; iCharPos := specifier.ParseToken( strToParse, iCharPos, strToken ); if( iCharPos > 0 ) then begin if specifier.ShouldParse then FTokens.Add( strToken ); end else begin exit; end; end; Result := True; end; procedure TPatternParser.SetPattern( const strNewPattern : String ); var iPrefix : integer; prefixes : TStringList; dp : TDelimiterParser; specifier : TPatternSpecifier; strTmp : String; begin FPattern := strNewPattern; FSpecifiers.Clear; dp := TDelimiterParser.Create; dp.Delimiters := FPlaceholder; dp.ToParse := strNewPattern; if ( dp.Status = parseOk ) then begin prefixes := dp.Tokens; for iPrefix := 0 to prefixes.Count - 1 do begin specifier := TPatternSpecifier.Create; specifier.Prefix := prefixes[ iPrefix ]; if ( iPrefix < prefixes.Count - 1 ) then // set delimiter begin specifier.Delimiters := prefixes[ iPrefix + 1 ][1]; specifier.ShouldParse := True; end else begin specifier.Delimiters := ''; strTmp := Copy( strNewPattern, Length( strNewPattern ) - Length( FPlaceHolder ) + 1, Length( FPlaceHolder ) ); specifier.ShouldParse := ( strTmp = FPlaceHolder ); end; FSpecifiers.Add( specifier ); end; end else begin specifier := TPatternSpecifier.Create; specifier.Prefix := strNewPattern; specifier.ShouldParse := False; specifier.Delimiters := ' '; FSpecifiers.Add( specifier ); end; dp.Free; end; //************************************************************************ // TPatternSpecifier Implementation //************************************************************************ function TPatternSpecifier.ParseToken( const strToParse : String; nStart : integer; var strToken : String ) : integer; var iPos, jPos : integer; begin iPos := nStart; // Test / strip prefix. for jPos := 1 to Length( FPrefix ) do begin if ( strToParse[iPos] <> FPrefix[jPos] ) then begin Result := -iPos; exit; end else begin iPos := iPos + 1; end; end; jPos := iPos; while FShouldParse and ( jPos <= Length( strToParse ) ) do begin if ( Pos( strToParse[jPos], FDelimiters ) > 0 ) then begin break; end else begin jPos := jPos + 1; end; end; // Assertion: jpos now points to next character past the parsed token, // or to iPos if no parsing done. if ( jPos > iPos ) then begin strToken := Copy( strToParse, iPos, jPos - iPos ); Result := jPos; end else begin if not FShouldParse then begin Result := iPos; strToken := ''; end else begin Result := -iPos; end; end; end; //************************************************************************ // TColumnarParser Implementation //************************************************************************ constructor TColumnarParser.Create; begin inherited Create; FSpecifiers := TList.Create; end; destructor TColumnarParser.Destroy; begin FSpecifiers.Free; inherited Destroy; end; function TColumnarParser.DoParse( const strToParse : String ) : Boolean; var iSpecifier : integer; strToken : String; specifier : TColumnSpecifier; begin FTokens.Clear; Result := False; for iSpecifier := 0 to FSpecifiers.Count - 1 do begin specifier := FSpecifiers.Items[ iSpecifier ]; // Empty tokens are possible! Not errors, for this parser. specifier.ParseToken( strToParse, strToken ); FTokens.Add( strToken ); end; Result := True; end; procedure TColumnarParser.SetColumns( const strColumnDesc : String ); var dp : TDelimiterParser; iToken : integer; nTokens : integer; specifier : TColumnSpecifier; begin FSpecifiers.Clear; if ( strColumnDesc = '' ) then begin FStatus := parseInit; exit; end; dp := TDelimiterParser.Create; dp.Delimiters := '(, );:'; dp.ToParse := strColumnDesc; if ( dp.Status = parseOk ) then begin iToken := 0; nTokens := dp.Tokens.Count; while ( iToken < nTokens ) do begin specifier := TColumnSpecifier.Create; specifier.StripChars := StripChars; if ( iToken < nTokens - 1 ) then // parse start-end pairs begin specifier.ColStart := StrToInt( dp.Tokens[ iToken ] ); specifier.ColEnd := StrToInt( dp.Tokens[ iToken + 1 ] ); iToken := iToken + 2; end else // parse trailing singleton begin specifier.ColStart := strToInt( dp.Tokens[ iToken ] ); specifier.ColEnd := 999; iToken := iToken + 1; end; FSpecifiers.Add( specifier ); end; FStatus := parseOk; end else begin FStatus := parseFail; end; end; //************************************************************************ // TColumnSpecifier Implementation //************************************************************************ function TColumnSpecifier.GetColEnd : integer; begin Result := FColStart + FColWidth - 1; end; procedure TColumnSpecifier.SetColEnd( nNewEnd : integer ); begin if( nNewEnd <= FColStart ) then raise EParseError.Create( 'TColumnSpecifier.SetColEnd() -- end <= start.' ); FColWidth := nNewEnd - FColStart + 1; end; function TColumnSpecifier.ParseToken( const strToParse : String; var strToken : String ) : Boolean; var iChar : integer; begin strToken := Copy( strToParse, FColStart, FColWidth ); for iChar := Length( strToken ) downto 1 do begin if ( Pos( strToken[iChar], FStripChars ) > 0 ) then Delete( strToken, iChar, 1 ); end; Result := ( Length( strToken ) > 0 ); end; end.