Code:
unit unt_EPGBuilder;
interface
uses
Winapi.Windows, Winapi.Messages, System.SysUtils, System.Variants, System.Classes,
Winapi.WinInet;
type
{ TEPGChannel class }
TEPGChannel = class
private
FChannelID : string;
FChannelName : string;
FSiteID : string;
public
constructor Create(AChannelID, AChannelName, ASiteID: string); virtual;
published
property ChannelID : string read FChannelID;
property ChannelName : string read FChannelName;
property SiteID : string read FSiteID;
end;
{ TEPGChannel List class }
TEPGChannelList = class(TObject)
private
FList : Array of TEPGChannel;
function GetEPGChannel(Index: Integer) : TEPGChannel;
procedure SetEPGChannel(Index: Integer; EPGChannel: TEPGChannel);
function GetCount : Integer;
public
constructor Create; virtual;
destructor Destroy; override;
procedure Add(AEPGChannel: TEPGChannel);
property Items[Index: Integer]: TEPGChannel read GetEPGChannel write SetEPGChannel;
published
property Count : Integer read GetCount;
end;
{ TEPGChannel Thread class }
TEPGChannelThread = class(TThread)
private
FDays : Integer;
FChannelID : string;
FChannelName : string;
FSiteID : string;
FUserAgent : string;
FLang : string;
FTimeShift : Integer;
FOutput : TStrings;
procedure WriteXMLTV;
protected
procedure ScrapeChannel; virtual;
procedure Execute; override;
public
FResult : TStringList;
constructor Create; overload;
constructor Create(CreateSuspended: Boolean); overload;
destructor Destroy; override;
function GetURL(AURL: string) : string;
published
property Days : Integer read FDays write FDays;
property ChannelID : string read FChannelID write FChannelID;
property ChannelName : string read FChannelName write FChannelName;
property SiteID : string read FSiteID write FSiteID;
property UserAgent : string read FUserAgent write FUserAgent;
property Lang: string read FLang write FLang;
property TimeShift : Integer read FTimeShift write FTimeShift;
property OutputLines : TStrings read FOutput write FOutput;
end;
TEPGGrabber = class(TComponent)
private
FChannelList : TEPGChannelList;
FDays : Integer;
FLang : string;
FAgent : string;
FTimeShift : Integer;
FOutput : TStrings;
public
constructor Create(AOwner: TComponent); override;
destructor Destroy; override;
procedure Execute; virtual;
function LoadConfig(Filename: string) : Boolean;
function GetURL(AURL: string) : string;
property Channels : TEPGChannelList read FChannelList write FChannelList;
published
property Days : Integer read FDays write FDays;
property Lang : string read FLang write FLang;
property UserAgent : string read FAgent write FAgent;
property TimeShift : Integer read FTimeShift write FTimeShift;
property OutputLines : TStrings read FOutput write FOutput;
end;
const
UserAgentIE11 : string = 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko';
implementation
{ TEPGChannel class }
(******************************************************************************)
constructor TEPGChannel.Create(AChannelID, AChannelName, ASiteID: string);
(******************************************************************************)
begin
FChannelID := AChannelID;
FChannelName := AChannelName;
FSiteID := ASiteID;
end;
{ TEPGChannel List class }
(******************************************************************************)
constructor TEPGChannelList.Create;
(******************************************************************************)
begin
inherited Create;
end;
(******************************************************************************)
destructor TEPGChannelList.Destroy;
(******************************************************************************)
begin
SetLength(FList, 0);
inherited Destroy;
end;
(******************************************************************************)
procedure TEPGChannelList.Add(AEPGChannel: TEPGChannel);
(******************************************************************************)
begin
SetLength(FList, Length(FList) + 1);
FList[High(FList)] := AEPGChannel;
end;
(******************************************************************************)
function TEPGChannelList.GetEPGChannel(Index: Integer) : TEPGChannel;
(******************************************************************************)
begin
Result := FList[Index];
end;
(******************************************************************************)
procedure TEPGChannelList.SetEPGChannel(Index: Integer; EPGChannel: TEPGChannel);
(******************************************************************************)
begin
FList[Index] := EPGChannel;
end;
(******************************************************************************)
function TEPGChannelList.GetCount : Integer;
(******************************************************************************)
begin
Result := Length(FList);
end;
{ TEPGChannel Thread class }
(******************************************************************************)
constructor TEPGChannelThread.Create;
(******************************************************************************)
begin
inherited Create;
FResult := TStringList.Create;
end;
(******************************************************************************)
constructor TEPGChannelThread.Create(CreateSuspended: Boolean);
(******************************************************************************)
begin
inherited Create(CreateSuspended);
FResult := TStringList.Create;
end;
(******************************************************************************)
destructor TEPGChannelThread.Destroy;
(******************************************************************************)
begin
FResult.Free;
inherited Destroy;
end;
(******************************************************************************)
procedure TEPGChannelThread.ScrapeChannel;
(******************************************************************************)
begin
// FResult.Add('scraped_info');
end;
(******************************************************************************)
procedure TEPGChannelThread.WriteXMLTV;
(******************************************************************************)
begin
// Schrijf gescrape'te kanaal + programma's naar TStrings
// deze dient opgegeven te worden voordat Execute wordt aangeroepen.
if Assigned(FOutput) then
FOutput.AddStrings(FResult);
end;
(******************************************************************************)
function TEPGChannelThread.GetURL(AURL: string) : string;
(******************************************************************************)
var
NetHandle : HINTERNET;
UrlHandle : HINTERNET;
Buffer : array [0..999999] of Byte;
BytesRead : dWord;
StrBuffer : UTF8String;
ResultStr : String;
begin
Result := '';
ResultStr := '';
if Pos('http://', Lowercase(AURL)) = 0 then Exit;
if (UserAgent = '') then UserAgent := UserAgentIE11;
NetHandle := InternetOpen(PChar(UserAgent), INTERNET_OPEN_TYPE_PRECONFIG, nil, nil, 0);
if Assigned(NetHandle) then
try
UrlHandle := InternetOpenUrl(NetHandle, PChar(AUrl), nil, 0, INTERNET_FLAG_RELOAD, 0);
if Assigned(UrlHandle) then
try
repeat
InternetReadFile(UrlHandle, @Buffer, SizeOf(Buffer), BytesRead);
SetString(StrBuffer, PAnsiChar(@Buffer[0]), BytesRead);
ResultStr := ResultStr + StrBuffer;
until BytesRead = 0;
if ResultStr <> '' then
Result := ResultStr;
finally
InternetCloseHandle(UrlHandle);
end;
finally
InternetCloseHandle(NetHandle);
end;
end;
(******************************************************************************)
procedure TEPGChannelThread.Execute;
(******************************************************************************)
begin
// Scrape kanaal.
ScrapeChannel;
// Schrijf kanaal gegevens weg naar output.
Synchronize(WriteXMLTV);
end;
{ TEPGGrabber Class }
(******************************************************************************)
constructor TEPGGrabber.Create(AOwner: TComponent);
(******************************************************************************)
begin
//inherited Create(Self);
FChannelList := TEPGChannelList.Create;
end;
(******************************************************************************)
destructor TEPGGrabber.Destroy;
(******************************************************************************)
begin
FChannelList.Free;
inherited Destroy;
end;
(******************************************************************************)
procedure TEPGGrabber.Execute;
(******************************************************************************)
var
I : Integer;
GrabberThreads: array of TEPGChannelThread;
begin
// Eerst alle threads (kanalen) aanmaken.
SetLength(GrabberThreads, Channels.Count);
for I := 0 to Channels.Count -1 do
begin
GrabberThreads[I] := TEPGChannelThread.Create(True);
GrabberThreads[I].Days := Days;
GrabberThreads[I].ChannelID := Channels.Items[I].ChannelID;
GrabberThreads[I].ChannelName := Channels.Items[I].ChannelName;
GrabberThreads[I].SiteID := Channels.Items[I].SiteID;
GrabberThreads[I].UserAgent := UserAgent;
GrabberThreads[I].Lang := Lang;
GrabberThreads[I].TimeShift := TimeShift;
GrabberThreads[I].OutputLines := OutputLines;
end;
// Dan alle threads (kanalen) activeren, dus alle kanalen scrapen.
for I := 0 to Channels.Count -1 do
begin
GrabberThreads[I].Start;
end;
// Dan wachten op alle threads (kanalen) en vrijmaken.
for I := 0 to Channels.Count -1 do
begin
GrabberThreads[I].WaitFor;
GrabberThreads[I].Free;
end;
end;
(******************************************************************************)
function TEPGGrabber.LoadConfig(Filename: string) : Boolean;
(******************************************************************************)
var
C : TStringList;
I : Integer;
TEMP : string;
XMLTVID : string;
SITEID : string;
DISPNAME : string;
const
CXMLTVID : string = 'XMLTVID=';
CSITEID : string = 'SITEID=';
CDISPNAME : string = 'DISPLAYNAME=';
// Voeg kanaal toe aan XML.
procedure AddXMLChannel;
const
XML : string = ' <channel id="%s">' + #13#10 +
' <display-name lang="%s">%s</display-name>' + #13#10 +
' </channel>';
begin
if Assigned(OutputLines) then
OutputLines.Add(Format(XML, [XMLTVID, Lang, DISPNAME]));
end;
begin
Result := False;
C := TStringList.Create;
try
if FileExists(Filename) then
begin
C.LoadFromFile(Filename);
for I := 0 to C.Count -1 do
begin
Temp := C[I];
if (Pos('*', Temp) = 1) then Continue;
if (Pos(CXMLTVID, Temp) > 0) and
(Pos(CSITEID, Temp) > 0) and
(Pos(CDISPNAME, Temp) > 0) then
begin
XMLTVID := Copy(Temp, 1 + Length(CXMLTVID), Pos('|', Temp) - Length(CXMLTVID) - 1);
Delete(Temp, 1, Pos('|', Temp));
SITEID := Copy(Temp, 1 + Length(CSITEID), Pos('|', Temp) - Length(CSITEID) -1);
Delete(Temp, 1, Pos('|', Temp));
DISPNAME := Copy(Temp, 1 + Length(CDISPNAME), Length(C[I]));
if Assigned(FChannelList) then
FChannelList.Add(TEPGChannel.Create(XMLTVID, DISPNAME, SITEID));
AddXMLChannel;
end;
end;
if FChannelList.Count > 0 then Result := True;
end;
finally
C.Free;
end;
end;
(******************************************************************************)
function TEPGGrabber.GetURL(AURL: string) : string;
(******************************************************************************)
var
NetHandle : HINTERNET;
UrlHandle : HINTERNET;
Buffer : array [0..999999] of Byte;
BytesRead : dWord;
StrBuffer : UTF8String;
ResultStr : String;
begin
Result := '';
ResultStr := '';
if Pos('http://', Lowercase(AURL)) = 0 then Exit;
if (UserAgent = '') then UserAgent := UserAgentIE11;
NetHandle := InternetOpen(PChar(UserAgent), INTERNET_OPEN_TYPE_PRECONFIG, nil, nil, 0);
if Assigned(NetHandle) then
try
UrlHandle := InternetOpenUrl(NetHandle, PChar(AUrl), nil, 0, INTERNET_FLAG_RELOAD, 0);
if Assigned(UrlHandle) then
try
repeat
InternetReadFile(UrlHandle, @Buffer, SizeOf(Buffer), BytesRead);
SetString(StrBuffer, PAnsiChar(@Buffer[0]), BytesRead);
ResultStr := ResultStr + StrBuffer;
until BytesRead = 0;
if ResultStr <> '' then
Result := ResultStr;
finally
InternetCloseHandle(UrlHandle);
end;
finally
InternetCloseHandle(NetHandle);
end;
end;
end.
Dat is de basis die ik heb gemaakt, ik maak dus voor elke scraper hiervan een afgeleide classe van TEPGGrabber en TEPChannelThread. Ik doe het nu zoals je me had aangegeven.. Ik scrape elk kanaal in een aparte thread, en schrijf die via synchronize weg naar mijn memo. Als ik debug zie ik ook netjes dat hij alle threads start, dus neem aan dat die ook synchroom samen lopen? of wachten die in volgorde tot de volgende is afgelopen? Want als ik een logging inbouw in de thread die elk naar een eigen log schrijft, zie ik ook dat ze naast elkaar gestart worden).
Bookmarks