Difference between revisions of "Download from SourceForge"
From Free Pascal wiki
Jump to navigationJump to searchm |
(Corrected previous edit: Copied the code over from the forum post. Why confuse people and keep info spread out in multiple places?) |
||
Line 1: | Line 1: | ||
You can use the [[Synapse]] networking library to download files from SourceForge. Because SourceForge store files on multiple mirrors you have to deal with redirection. | You can use the [[Synapse]] networking library to download files from SourceForge. Because SourceForge store files on multiple mirrors you have to deal with redirection. | ||
− | ''' | + | <syntaxhighlight> |
− | http:// | + | function DownloadHTTPStream(URL: string; Buffer: TStream): boolean; |
+ | // Download file; retry if necessary. | ||
+ | const | ||
+ | MaxRetries = 3; | ||
+ | var | ||
+ | RetryAttempt: integer; | ||
+ | HTTPGetResult: boolean; | ||
+ | begin | ||
+ | Result:=false; | ||
+ | RetryAttempt := 1; | ||
+ | HTTPGetResult := False; | ||
+ | while ((HTTPGetResult = False) and (RetryAttempt < MaxRetries)) do | ||
+ | begin | ||
+ | HTTPGetResult := HttpGetBinary(URL, Buffer); | ||
+ | //Application.ProcessMessages; | ||
+ | Sleep(100 * RetryAttempt); | ||
+ | RetryAttempt := RetryAttempt + 1; | ||
+ | end; | ||
+ | if HTTPGetResult = False then | ||
+ | raise Exception.Create('Cannot load document from remote server'); | ||
+ | Buffer.Position := 0; | ||
+ | if Buffer.Size = 0 then | ||
+ | raise Exception.Create('Downloaded document is empty.'); | ||
+ | Result := True; | ||
+ | end; | ||
+ | |||
+ | function SFDirectLinkURL(URL: string; Document: TMemoryStream): string; | ||
+ | { | ||
+ | Transform this part of the body: | ||
+ | <noscript> | ||
+ | <meta http-equiv="refresh" content="5; url=http://downloads.sourceforge.net/project/base64decoder/base64decoder/version%202.0/b64util.zip?r=&ts=1329648745&use_mirror=kent"> | ||
+ | </noscript> | ||
+ | into a valid URL: | ||
+ | http://downloads.sourceforge.net/project/base64decoder/base64decoder/version%202.0/b64util.zip?r=&ts=1329648745&use_mirror=kent | ||
+ | } | ||
+ | const | ||
+ | Refresh='<meta http-equiv="refresh"'; | ||
+ | URLMarker='url='; | ||
+ | var | ||
+ | Counter: integer; | ||
+ | HTMLBody: TStringList; | ||
+ | RefreshStart: integer; | ||
+ | URLStart: integer; | ||
+ | begin | ||
+ | HTMLBody:=TStringList.Create; | ||
+ | try | ||
+ | HTMLBody.LoadFromStream(Document); | ||
+ | for Counter:=0 to HTMLBody.Count-1 do | ||
+ | begin | ||
+ | // This line should be between noscript tags and give the direct download locations: | ||
+ | RefreshStart:=Ansipos(Refresh, HTMLBody[Counter]); | ||
+ | if RefreshStart>0 then | ||
+ | begin | ||
+ | URLStart:=AnsiPos(URLMarker, HTMLBody[Counter])+Length(URLMarker); | ||
+ | if URLStart>RefreshStart then | ||
+ | begin | ||
+ | // Look for closing " | ||
+ | URL:=Copy(HTMLBody[Counter], | ||
+ | URLStart, | ||
+ | PosEx('"',HTMLBody[Counter],URLStart+1)-URLStart); | ||
+ | infoln('debug: new url after sf noscript:'); | ||
+ | infoln(URL); | ||
+ | break; | ||
+ | end; | ||
+ | end; | ||
+ | end; | ||
+ | finally | ||
+ | HTMLBody.Free; | ||
+ | end; | ||
+ | result:=URL; | ||
+ | end; | ||
− | |||
function SourceForgeURL(URL: string): string; | function SourceForgeURL(URL: string): string; | ||
// Detects sourceforge download and tries to deal with | // Detects sourceforge download and tries to deal with | ||
Line 74: | Line 143: | ||
begin | begin | ||
HTTPSender.HTTPMethod('GET', URL); | HTTPSender.HTTPMethod('GET', URL); | ||
+ | infoln('debug: headers:'); | ||
+ | infoln(HTTPSender.Headers.Text); | ||
case HTTPSender.Resultcode of | case HTTPSender.Resultcode of | ||
301, 302, 307: | 301, 302, 307: | ||
Line 98: | Line 169: | ||
100..200: | 100..200: | ||
begin | begin | ||
− | // | + | //Assume a sourceforge timer/direct link page |
− | + | URL:=SFDirectLinkURL(URL, HTTPSender.Document); //Find out | |
− | |||
− | |||
− | |||
− | |||
FoundCorrectURL:=true; //We're done by now | FoundCorrectURL:=true; //We're done by now | ||
end; | end; | ||
Line 113: | Line 180: | ||
end;//case | end;//case | ||
end;//while | end;//while | ||
+ | infoln('debug: resulting url after sf redir: *' + URL + '*'); | ||
finally | finally | ||
HTTPSender.Free; | HTTPSender.Free; | ||
Line 118: | Line 186: | ||
end; | end; | ||
result:=URL; | result:=URL; | ||
+ | end; | ||
+ | |||
+ | function DownloadHTTP(URL, TargetFile: string): boolean; | ||
+ | // Download file; retry if necessary. | ||
+ | // Deals with SourceForge download links | ||
+ | var | ||
+ | Buffer: TMemoryStream; | ||
+ | begin | ||
+ | result:=false; | ||
+ | URL:=SourceForgeURL(URL); //Deal with sourceforge URLs | ||
+ | try | ||
+ | Buffer := TMemoryStream.Create; | ||
+ | DownloadHTTPStream(URL, Buffer); | ||
+ | Buffer.SaveToFile(TargetFile); | ||
+ | result:=true; | ||
+ | finally | ||
+ | FreeAndNil(Buffer); | ||
+ | end; | ||
end; | end; | ||
</syntaxhighlight> | </syntaxhighlight> |
Revision as of 12:13, 23 August 2014
You can use the Synapse networking library to download files from SourceForge. Because SourceForge store files on multiple mirrors you have to deal with redirection.
function DownloadHTTPStream(URL: string; Buffer: TStream): boolean;
// Download file; retry if necessary.
const
MaxRetries = 3;
var
RetryAttempt: integer;
HTTPGetResult: boolean;
begin
Result:=false;
RetryAttempt := 1;
HTTPGetResult := False;
while ((HTTPGetResult = False) and (RetryAttempt < MaxRetries)) do
begin
HTTPGetResult := HttpGetBinary(URL, Buffer);
//Application.ProcessMessages;
Sleep(100 * RetryAttempt);
RetryAttempt := RetryAttempt + 1;
end;
if HTTPGetResult = False then
raise Exception.Create('Cannot load document from remote server');
Buffer.Position := 0;
if Buffer.Size = 0 then
raise Exception.Create('Downloaded document is empty.');
Result := True;
end;
function SFDirectLinkURL(URL: string; Document: TMemoryStream): string;
{
Transform this part of the body:
<noscript>
<meta http-equiv="refresh" content="5; url=http://downloads.sourceforge.net/project/base64decoder/base64decoder/version%202.0/b64util.zip?r=&ts=1329648745&use_mirror=kent">
</noscript>
into a valid URL:
http://downloads.sourceforge.net/project/base64decoder/base64decoder/version%202.0/b64util.zip?r=&ts=1329648745&use_mirror=kent
}
const
Refresh='<meta http-equiv="refresh"';
URLMarker='url=';
var
Counter: integer;
HTMLBody: TStringList;
RefreshStart: integer;
URLStart: integer;
begin
HTMLBody:=TStringList.Create;
try
HTMLBody.LoadFromStream(Document);
for Counter:=0 to HTMLBody.Count-1 do
begin
// This line should be between noscript tags and give the direct download locations:
RefreshStart:=Ansipos(Refresh, HTMLBody[Counter]);
if RefreshStart>0 then
begin
URLStart:=AnsiPos(URLMarker, HTMLBody[Counter])+Length(URLMarker);
if URLStart>RefreshStart then
begin
// Look for closing "
URL:=Copy(HTMLBody[Counter],
URLStart,
PosEx('"',HTMLBody[Counter],URLStart+1)-URLStart);
infoln('debug: new url after sf noscript:');
infoln(URL);
break;
end;
end;
end;
finally
HTMLBody.Free;
end;
result:=URL;
end;
function SourceForgeURL(URL: string): string;
// Detects sourceforge download and tries to deal with
// redirection, and extracting direct download link.
// Thanks to
// Ocye: http://lazarus.freepascal.org/index.php/topic,13425.msg70575.html#msg70575
const
SFProjectPart = '//sourceforge.net/projects/';
SFFilesPart = '/files/';
SFDownloadPart ='/download';
var
HTTPSender: THTTPSend;
i, j: integer;
FoundCorrectURL: boolean;
SFDirectory: string; //Sourceforge directory
SFDirectoryBegin: integer;
SFFileBegin: integer;
SFFilename: string; //Sourceforge name of file
SFProject: string;
SFProjectBegin: integer;
begin
// Detect SourceForge download; e.g. from URL
// 1 2 3 4 5 6 7 8 9
// 1234557890123456789012345578901234567890123455789012345678901234557890123456789012345578901234567890
// http://sourceforge.net/projects/base64decoder/files/base64decoder/version%202.0/b64util.zip/download
// ^^^project^^^ ^^^directory............^^^ ^^^file^^^
FoundCorrectURL:=true; //Assume not a SF download
i:=Pos(SFProjectPart, URL);
if i>0 then
begin
// Possibly found project; now extract project, directory and filename parts.
SFProjectBegin:=i+Length(SFProjectPart);
j := PosEx(SFFilesPart, URL, SFProjectBegin);
if (j>0) then
begin
SFProject:=Copy(URL, SFProjectBegin, j-SFProjectBegin);
SFDirectoryBegin:=PosEx(SFFilesPart, URL, SFProjectBegin)+Length(SFFilesPart);
if SFDirectoryBegin>0 then
begin
// Find file
// URL might have trailing arguments... so: search for first
// /download coming up from the right, but it should be after
// /files/
i:=RPos(SFDownloadPart, URL);
// Now look for previous / so we can make out the file
// This might perhaps be the trailing / in /files/
SFFileBegin:=RPosEx('/',URL,i-1)+1;
if SFFileBegin>0 then
begin
SFFilename:=Copy(URL,SFFileBegin, i-SFFileBegin);
//Include trailing /
SFDirectory:=Copy(URL, SFDirectoryBegin, SFFileBegin-SFDirectoryBegin);
FoundCorrectURL:=false;
end;
end;
end;
end;
if not FoundCorrectURL then
begin
try
// Rewrite URL if needed for Sourceforge download redirection
// Detect direct link in HTML body and get URL from that
HTTPSender := THTTPSend.Create;
//Who knows, this might help:
HTTPSender.UserAgent:='curl/7.21.0 (i686-pc-linux-gnu) libcurl/7.21.0 OpenSSL/0.9.8o zlib/1.2.3.4 libidn/1.18';
while not FoundCorrectURL do
begin
HTTPSender.HTTPMethod('GET', URL);
infoln('debug: headers:');
infoln(HTTPSender.Headers.Text);
case HTTPSender.Resultcode of
301, 302, 307:
begin
for i := 0 to HTTPSender.Headers.Count - 1 do
if (Pos('Location: ', HTTPSender.Headers.Strings[i]) > 0) or
(Pos('location: ', HTTPSender.Headers.Strings[i]) > 0) then
begin
j := Pos('use_mirror=', HTTPSender.Headers.Strings[i]);
if j > 0 then
URL :=
'http://' + RightStr(HTTPSender.Headers.Strings[i],
length(HTTPSender.Headers.Strings[i]) - j - 10) +
'.dl.sourceforge.net/project/' +
SFProject + '/' + SFDirectory + SFFilename
else
URL:=StringReplace(
HTTPSender.Headers.Strings[i], 'Location: ', '', []);
HTTPSender.Clear;//httpsend
FoundCorrectURL:=true;
break; //out of rewriting loop
end;
end;
100..200:
begin
//Assume a sourceforge timer/direct link page
URL:=SFDirectLinkURL(URL, HTTPSender.Document); //Find out
FoundCorrectURL:=true; //We're done by now
end;
500: raise Exception.Create('No internet connection available');
//Internal Server Error ('+aURL+')');
else
raise Exception.Create('Download failed with error code ' +
IntToStr(HTTPSender.ResultCode) + ' (' + HTTPSender.ResultString + ')');
end;//case
end;//while
infoln('debug: resulting url after sf redir: *' + URL + '*');
finally
HTTPSender.Free;
end;
end;
result:=URL;
end;
function DownloadHTTP(URL, TargetFile: string): boolean;
// Download file; retry if necessary.
// Deals with SourceForge download links
var
Buffer: TMemoryStream;
begin
result:=false;
URL:=SourceForgeURL(URL); //Deal with sourceforge URLs
try
Buffer := TMemoryStream.Create;
DownloadHTTPStream(URL, Buffer);
Buffer.SaveToFile(TargetFile);
result:=true;
finally
FreeAndNil(Buffer);
end;
end;
Then you can download file using standard method.
...
uses httpsend
...
function DownloadHTTP(URL, TargetFile: string): boolean;
// Download file; retry if necessary.
// Deals with SourceForge download links
// Could use Synapse HttpGetBinary, but that doesn't deal
// with result codes (i.e. it happily downloads a 404 error document)
const
MaxRetries=3;
var
HTTPGetResult: boolean;
HTTPSender: THTTPSend;
RetryAttempt: integer;
begin
result:=false;
RetryAttempt:=1;
//Optional: mangling of Sourceforge file download URLs; see below.
URL:=SourceForgeURL(URL); //Deal with sourceforge URLs
HTTPSender:=THTTPSend.Create;
try
try
// Try to get the file
HTTPGetResult:=HTTPSender.HTTPMethod('GET', URL);
while (HTTPGetResult=false) and (RetryAttempt<MaxRetries) do
begin
sleep(500*RetryAttempt);
HTTPGetResult:=HTTPSender.HTTPMethod('GET', URL);
RetryAttempt:=RetryAttempt+1;
end;
// If we have an answer from the server, check if the file
// was sent to us.
case HTTPSender.Resultcode of
100..299:
begin
with TFileStream.Create(TargetFile,fmCreate or fmOpenWrite) do
try
Seek(0, soFromBeginning);
CopyFrom(HTTPSender.Document, 0);
finally
Free;
end;
result:=true;
end; //informational, success
300..399: result:=false; //redirection. Not implemented, but could be.
400..499: result:=false; //client error; 404 not found etc
500..599: result:=false; //internal server error
else result:=false; //unknown code
end;
except
// We don't care for the reason for this error; the download failed.
result:=false;
end;
finally
HTTPSender.Free;
end;
end;