Get the text content of any file?

151 Views Asked by At

With Delphi 10 Seattle, I need to get the text content of any file.

I've tried with GetFileContentsFromIFilter in SPFilter.pas from http://develop.shorterpath.com/spfree/default.asp, but I could not get it to work:

uses SPFilter;

procedure TForm1.btnTestClick(Sender: TObject);
var
  FilterStream: TStringStream;
begin
  FilterStream := TStringStream.Create;
  try
    if SPFilter.GetFileContentsFromIFilter(Trim(edtFile.Text), FilterStream) then // D:\Readme.txt
    begin
      FilterStream.Seek(0, soFromBeginning);
      Memo1.Lines.LoadFromStream(FilterStream);
    end;
  finally
    FilterStream.Free;
  end;
end;

This is the SPFilter.pas unit:

(******************************************************************************)
(* SPFilter - Read file content using IFilter interface                       *)
(* Shorter Path Free Components 1.0                                           *)
(*                                                                            *)
(* Copyright (c) 2003 Shorter Path Software                                   *)
(* http://develop.shorterpath.com                                             *)
(******************************************************************************)

unit SPFilter;

interface

uses
  CodeSiteLogging,
  Classes;

function GetFileContentsFromIFilter(const FileName: string; OutData: TStream): Boolean;

implementation

uses
  Windows, SysUtils, Registry, ActiveX, Filter;

function GetFileContentsFromIFilter(const FileName: string;
  OutData: TStream): Boolean;
var
  Reg: TRegistry;
  DocType, DocClass, HandlerClass, PersistClass, FilterDLL: string;
  DLLHandle: THandle;
  ClassFactory: IClassFactory;
  FilterObj: IFilter;
  PersistFile: IPersistFile;
  DllGetClassObject: TDllGetClassObject;
  DllCanUnloadNow: TDLLCanUnloadNow;
  Res, ChunkRes: HResult;
  pFlags: ULONG;
  WFileName: WideString;
  StatChunk: TStatChunk;
  cwcBuffer: ULONG;
  awcBuffer: PWideChar;
  Txt: WideString;
  AnsiTxt: string;
  t: string;
  EndOfChunksCount: Integer;
begin
  Result := False;

  { Find filter DLL }
  FilterDLL := EmptyStr;

  { Step 1: Determine the CLSID }
  Reg := TRegistry.Create(KEY_READ);
  Reg.RootKey := HKEY_LOCAL_MACHINE;

  { A. Locate document type }
  DocType := 'Software\Classes\' + ExtractFileExt(FileName);
  if Reg.OpenKey(DocType, False) then
  begin
    DocType := Reg.ReadString(EmptyStr);
    Reg.CloseKey;
    CodeSite.Send('document type', DocType); // -> txtfile

    if Length(DocType) > 0 then
    begin
      { B. Locate document class }
      if Reg.OpenKey('Software\Classes\' + DocType + '\CLSID', False) then // FALSE HERE!!
      begin
        CodeSite.Send('CLSID');
        DocClass := Reg.ReadString(EmptyStr);
        Reg.CloseKey;
      end;
    end;

    { Step 2: Determine the Persistent Handler }
    if Length(DocClass) > 0 then
    begin
      if Reg.OpenKey('Software\Classes\CLSID\' + DocClass + '\PersistentHandler', False) then
      begin
        HandlerClass := Reg.ReadString(EmptyStr);
        Reg.CloseKey;
      end;
    end;

    { Step 3: Determine the IFilter Persistent Handler GUID }
    if Length(HandlerClass) > 0 then
    begin
      if Reg.OpenKey('Software\Classes\CLSID\' + HandlerClass +
        '\PersistentAddinsRegistered\' + GUIDToString(IID_IFilter), False) then
      begin
        PersistClass := Reg.ReadString(EmptyStr);
        Reg.CloseKey;
      end;
    end;

    { Step 4: Determine the Filter DLL }
    if Length(PersistClass) > 0 then
    begin
      if Reg.OpenKey('Software\Classes\CLSID\' + PersistClass + '\InprocServer32', False) then
      begin
        FilterDLL := Reg.ReadString(EmptyStr);
        Reg.CloseKey;
      end;
    end;
  end;
  Reg.Free;

  { Use Filter DLL to read the file }
  if Length(FilterDLL) > 0 then
  begin
    DLLHandle := LoadLibrary(PChar(FilterDLL));
    if DLLHandle <> 0 then
    begin
      @DllGetClassObject := GetProcAddress(DLLHandle, 'DllGetClassObject');
      @DllCanUnloadNow := GetProcAddress(DLLHandle, 'DllCanUnloadNow');
      if Assigned(DllGetClassObject) then
      begin
        { Get Class Factory }
        DllGetClassObject(StringToGUID(PersistClass), IClassFactory, ClassFactory);
        if Assigned(ClassFactory) then
        begin
          { Get IFilter object }
          ClassFactory.CreateInstance(nil, IFilter, FilterObj);
          if Assigned(FilterObj) then
          begin
            FilterObj.QueryInterface(IPersistFile, PersistFile);
            if Assigned(PersistFile) then
            begin
              WFileName := FileName;
              PersistFile.Load(PWideChar(WFileName), 0);
              EndOfChunksCount := 0;
              Res := FilterObj.Init(0, 0, nil, pFlags);
              if Res = S_OK then
              //if FilterObj.Init(0, 0, nil, pFlags) = S_OK then
              begin
                repeat
                  ChunkRes := FilterObj.GetChunk(StatChunk);
                  if ChunkRes = S_OK then
                  begin
                    if (StatChunk.flags and CHUNK_TEXT) <> 0 then
                    begin
                      GetMem(awcBuffer, 16384*2);
                      repeat
                        cwcBuffer := 16384;
                        FillChar(awcBuffer^, cwcBuffer, 0);
                        Res := FilterObj.GetText(cwcBuffer, awcBuffer);
                        if cwcBuffer > 0 then
                        begin
                          SetLength(Txt, cwcBuffer*2);
                          FillChar(PWideChar(Txt)^, cwcBuffer*2, 0);
                          Move(awcBuffer^, PWideChar(Txt)^, cwcBuffer*2);

                          SetLength(AnsiTxt, cwcBuffer*2);
                          FillChar(PChar(AnsiTxt)^, cwcBuffer*2, 0);
                          WideCharToMultiByte(CP_ACP, 0, PWideChar(Txt),
                            cwcBuffer, PAnsiChar(AnsiTxt), cwcBuffer*2, ' ', nil);

                          SetLength(AnsiTxt, StrLen(PChar(AnsiTxt)));
                          t := t + AnsiTxt;
                        end;
                      until Res = FILTER_E_NO_MORE_TEXT;
                      FreeMem(awcBuffer);
                    end;
                  end;
                  if ChunkRes = FILTER_E_END_OF_CHUNKS then
                    Inc(EndOfChunksCount) else
                    EndOfChunksCount := 0;
                until EndOfChunksCount > 1;
                { Return True for success }
                Result := True;
              end;
            end;
            { Release filter }
            PersistFile := nil;
            FilterObj := nil;
          end;
          { Release Class Factory }
          ClassFactory := nil;
        end;
      end;
      if Assigned(DllCanUnloadNow) then
      begin
        if DllCanUnloadNow = S_OK then
          FreeLibrary(DLLHandle) else
      end else
        FreeLibrary(DLLHandle);
    end;
  end;

  { Write data to stream }
  if Result then
  try
    OutData.Write(t[1], Length(t));
  except
    Result := False;
  end;
end;

end.

As you can see from the comment "FALSE HERE" it fails to get the CLSID of .TXT file. However, there is no CLSID key inside the txtfile registry key. So what is wrong here?

IFilter Explorer from Citeknet however does show me that there IS a valid iFilter for .TXT files!

Does anybody know how to get the content of any file?

0

There are 0 best solutions below