Difference between revisions of "Download from SourceForge"

From Free Pascal wiki
Jump to navigationJump to search
m
(Corrected previous edit: Copied the code over from the forum post. Why confuse people and keep info spread out in multiple places?)
Line 1: Line 1:
 
You can use the [[Synapse]] networking library to download files from SourceForge. Because SourceForge store files on multiple mirrors you have to deal with redirection.
 
You can use the [[Synapse]] networking library to download files from SourceForge. Because SourceForge store files on multiple mirrors you have to deal with redirection.
  
'''Download the working code here:'''
+
<syntaxhighlight>
http://forum.lazarus.freepascal.org/index.php/topic,13425.msg87038.html#msg87038
+
function DownloadHTTPStream(URL: string; Buffer: TStream): boolean;
 +
  // Download file; retry if necessary.
 +
const
 +
  MaxRetries = 3;
 +
var
 +
  RetryAttempt: integer;
 +
  HTTPGetResult: boolean;
 +
begin
 +
  Result:=false;
 +
  RetryAttempt := 1;
 +
  HTTPGetResult := False;
 +
  while ((HTTPGetResult = False) and (RetryAttempt < MaxRetries)) do
 +
  begin
 +
    HTTPGetResult := HttpGetBinary(URL, Buffer);
 +
    //Application.ProcessMessages;
 +
    Sleep(100 * RetryAttempt);
 +
    RetryAttempt := RetryAttempt + 1;
 +
  end;
 +
  if HTTPGetResult = False then
 +
    raise Exception.Create('Cannot load document from remote server');
 +
  Buffer.Position := 0;
 +
  if Buffer.Size = 0 then
 +
    raise Exception.Create('Downloaded document is empty.');
 +
  Result := True;
 +
end;
 +
 
 +
function SFDirectLinkURL(URL: string; Document: TMemoryStream): string;
 +
{
 +
Transform this part of the body:
 +
<noscript>
 +
<meta http-equiv="refresh" content="5; url=http://downloads.sourceforge.net/project/base64decoder/base64decoder/version%202.0/b64util.zip?r=&amp;ts=1329648745&amp;use_mirror=kent">
 +
</noscript>
 +
into a valid URL:
 +
http://downloads.sourceforge.net/project/base64decoder/base64decoder/version%202.0/b64util.zip?r=&amp;ts=1329648745&amp;use_mirror=kent
 +
}
 +
const
 +
  Refresh='<meta http-equiv="refresh"';
 +
  URLMarker='url=';
 +
var
 +
  Counter: integer;
 +
  HTMLBody: TStringList;
 +
  RefreshStart: integer;
 +
  URLStart: integer;
 +
begin
 +
  HTMLBody:=TStringList.Create;
 +
  try
 +
    HTMLBody.LoadFromStream(Document);
 +
    for Counter:=0 to HTMLBody.Count-1 do
 +
    begin
 +
      // This line should be between noscript tags and give the direct download locations:
 +
      RefreshStart:=Ansipos(Refresh, HTMLBody[Counter]);
 +
      if RefreshStart>0 then
 +
      begin
 +
        URLStart:=AnsiPos(URLMarker, HTMLBody[Counter])+Length(URLMarker);
 +
        if URLStart>RefreshStart then
 +
        begin
 +
          // Look for closing "
 +
          URL:=Copy(HTMLBody[Counter],
 +
            URLStart,
 +
            PosEx('"',HTMLBody[Counter],URLStart+1)-URLStart);
 +
          infoln('debug: new url after sf noscript:');
 +
          infoln(URL);
 +
          break;
 +
        end;
 +
      end;
 +
    end;
 +
  finally
 +
    HTMLBody.Free;
 +
  end;
 +
  result:=URL;
 +
end;
  
<syntaxhighlight>
 
 
function SourceForgeURL(URL: string): string;
 
function SourceForgeURL(URL: string): string;
 
// Detects sourceforge download and tries to deal with
 
// Detects sourceforge download and tries to deal with
Line 74: Line 143:
 
       begin
 
       begin
 
         HTTPSender.HTTPMethod('GET', URL);
 
         HTTPSender.HTTPMethod('GET', URL);
 +
        infoln('debug: headers:');
 +
        infoln(HTTPSender.Headers.Text);
 
         case HTTPSender.Resultcode of
 
         case HTTPSender.Resultcode of
 
           301, 302, 307:
 
           301, 302, 307:
Line 98: Line 169:
 
           100..200:
 
           100..200:
 
             begin
 
             begin
               //Could be a sourceforge timer/direct link page, but...
+
               //Assume a sourceforge timer/direct link page
               if AnsiPos('Content-Type: text/html', HTTPSender.Headers.Text)>0 then
+
               URL:=SFDirectLinkURL(URL, HTTPSender.Document); //Find out
              begin
 
                // find out... it's at least not a binary
 
                URL:=SFDirectLinkURL(URL, HTTPSender.Document);
 
              end;
 
 
               FoundCorrectURL:=true; //We're done by now
 
               FoundCorrectURL:=true; //We're done by now
 
             end;
 
             end;
Line 113: Line 180:
 
         end;//case
 
         end;//case
 
       end;//while
 
       end;//while
 +
      infoln('debug: resulting url after sf redir: *' + URL + '*');
 
     finally
 
     finally
 
       HTTPSender.Free;
 
       HTTPSender.Free;
Line 118: Line 186:
 
   end;
 
   end;
 
   result:=URL;
 
   result:=URL;
 +
end;
 +
 +
function DownloadHTTP(URL, TargetFile: string): boolean;
 +
// Download file; retry if necessary.
 +
// Deals with SourceForge download links
 +
var
 +
  Buffer: TMemoryStream;
 +
begin
 +
  result:=false;
 +
  URL:=SourceForgeURL(URL); //Deal with sourceforge URLs
 +
  try
 +
    Buffer := TMemoryStream.Create;
 +
    DownloadHTTPStream(URL, Buffer);
 +
    Buffer.SaveToFile(TargetFile);
 +
    result:=true;
 +
  finally
 +
    FreeAndNil(Buffer);
 +
  end;
 
end;
 
end;
 
</syntaxhighlight>
 
</syntaxhighlight>

Revision as of 12:13, 23 August 2014

You can use the Synapse networking library to download files from SourceForge. Because SourceForge store files on multiple mirrors you have to deal with redirection.

function DownloadHTTPStream(URL: string; Buffer: TStream): boolean;
  // Download file; retry if necessary.
const
  MaxRetries = 3;
var
  RetryAttempt: integer;
  HTTPGetResult: boolean;
begin
  Result:=false;
  RetryAttempt := 1;
  HTTPGetResult := False;
  while ((HTTPGetResult = False) and (RetryAttempt < MaxRetries)) do
  begin
    HTTPGetResult := HttpGetBinary(URL, Buffer);
    //Application.ProcessMessages;
    Sleep(100 * RetryAttempt);
    RetryAttempt := RetryAttempt + 1;
  end;
  if HTTPGetResult = False then
    raise Exception.Create('Cannot load document from remote server');
  Buffer.Position := 0;
  if Buffer.Size = 0 then
    raise Exception.Create('Downloaded document is empty.');
  Result := True;
end;

function SFDirectLinkURL(URL: string; Document: TMemoryStream): string;
{
Transform this part of the body:
<noscript>
<meta http-equiv="refresh" content="5; url=http://downloads.sourceforge.net/project/base64decoder/base64decoder/version%202.0/b64util.zip?r=&amp;ts=1329648745&amp;use_mirror=kent">
</noscript>
into a valid URL:
http://downloads.sourceforge.net/project/base64decoder/base64decoder/version%202.0/b64util.zip?r=&amp;ts=1329648745&amp;use_mirror=kent
}
const
  Refresh='<meta http-equiv="refresh"';
  URLMarker='url=';
var
  Counter: integer;
  HTMLBody: TStringList;
  RefreshStart: integer;
  URLStart: integer;
begin
  HTMLBody:=TStringList.Create;
  try
    HTMLBody.LoadFromStream(Document);
    for Counter:=0 to HTMLBody.Count-1 do
    begin
      // This line should be between noscript tags and give the direct download locations:
      RefreshStart:=Ansipos(Refresh, HTMLBody[Counter]);
      if RefreshStart>0 then
      begin
        URLStart:=AnsiPos(URLMarker, HTMLBody[Counter])+Length(URLMarker);
        if URLStart>RefreshStart then
        begin
          // Look for closing "
          URL:=Copy(HTMLBody[Counter],
            URLStart,
            PosEx('"',HTMLBody[Counter],URLStart+1)-URLStart);
          infoln('debug: new url after sf noscript:');
          infoln(URL);
          break;
        end;
      end;
    end;
  finally
    HTMLBody.Free;
  end;
  result:=URL;
end;

function SourceForgeURL(URL: string): string;
// Detects sourceforge download and tries to deal with
// redirection, and extracting direct download link.
// Thanks to
// Ocye: http://lazarus.freepascal.org/index.php/topic,13425.msg70575.html#msg70575
const
  SFProjectPart = '//sourceforge.net/projects/';
  SFFilesPart = '/files/';
  SFDownloadPart ='/download';
var
  HTTPSender: THTTPSend;
  i, j: integer;
  FoundCorrectURL: boolean;
  SFDirectory: string; //Sourceforge directory
  SFDirectoryBegin: integer;
  SFFileBegin: integer;
  SFFilename: string; //Sourceforge name of file
  SFProject: string;
  SFProjectBegin: integer;
begin
  // Detect SourceForge download; e.g. from URL
  //          1         2         3         4         5         6         7         8         9
  // 1234557890123456789012345578901234567890123455789012345678901234557890123456789012345578901234567890
  // http://sourceforge.net/projects/base64decoder/files/base64decoder/version%202.0/b64util.zip/download
  //                                 ^^^project^^^       ^^^directory............^^^ ^^^file^^^
  FoundCorrectURL:=true; //Assume not a SF download
  i:=Pos(SFProjectPart, URL);
  if i>0 then
  begin
    // Possibly found project; now extract project, directory and filename parts.
    SFProjectBegin:=i+Length(SFProjectPart);
    j := PosEx(SFFilesPart, URL, SFProjectBegin);
    if (j>0) then
    begin
      SFProject:=Copy(URL, SFProjectBegin, j-SFProjectBegin);
      SFDirectoryBegin:=PosEx(SFFilesPart, URL, SFProjectBegin)+Length(SFFilesPart);
      if SFDirectoryBegin>0 then
      begin
        // Find file
        // URL might have trailing arguments... so: search for first
        // /download coming up from the right, but it should be after
        // /files/
        i:=RPos(SFDownloadPart, URL);
        // Now look for previous / so we can make out the file
        // This might perhaps be the trailing / in /files/
        SFFileBegin:=RPosEx('/',URL,i-1)+1;

        if SFFileBegin>0 then
        begin
          SFFilename:=Copy(URL,SFFileBegin, i-SFFileBegin);
          //Include trailing /
          SFDirectory:=Copy(URL, SFDirectoryBegin, SFFileBegin-SFDirectoryBegin);
          FoundCorrectURL:=false;
        end;
      end;
    end;
  end;

  if not FoundCorrectURL then
  begin
    try
      // Rewrite URL if needed for Sourceforge download redirection
      // Detect direct link in HTML body and get URL from that
      HTTPSender := THTTPSend.Create;
      //Who knows, this might help:
      HTTPSender.UserAgent:='curl/7.21.0 (i686-pc-linux-gnu) libcurl/7.21.0 OpenSSL/0.9.8o zlib/1.2.3.4 libidn/1.18';
      while not FoundCorrectURL do
      begin
        HTTPSender.HTTPMethod('GET', URL);
        infoln('debug: headers:');
        infoln(HTTPSender.Headers.Text);
        case HTTPSender.Resultcode of
          301, 302, 307:
            begin
              for i := 0 to HTTPSender.Headers.Count - 1 do
                if (Pos('Location: ', HTTPSender.Headers.Strings[i]) > 0) or
                  (Pos('location: ', HTTPSender.Headers.Strings[i]) > 0) then
                begin
                  j := Pos('use_mirror=', HTTPSender.Headers.Strings[i]);
                  if j > 0 then
                    URL :=
                      'http://' + RightStr(HTTPSender.Headers.Strings[i],
                      length(HTTPSender.Headers.Strings[i]) - j - 10) +
                      '.dl.sourceforge.net/project/' +
                      SFProject + '/' + SFDirectory + SFFilename
                  else
                    URL:=StringReplace(
                      HTTPSender.Headers.Strings[i], 'Location: ', '', []);
                  HTTPSender.Clear;//httpsend
                  FoundCorrectURL:=true;
                  break; //out of rewriting loop
              end;
            end;
          100..200:
            begin
              //Assume a sourceforge timer/direct link page
              URL:=SFDirectLinkURL(URL, HTTPSender.Document); //Find out
              FoundCorrectURL:=true; //We're done by now
            end;
          500: raise Exception.Create('No internet connection available');
            //Internal Server Error ('+aURL+')');
          else
            raise Exception.Create('Download failed with error code ' +
              IntToStr(HTTPSender.ResultCode) + ' (' + HTTPSender.ResultString + ')');
        end;//case
      end;//while
      infoln('debug: resulting url after sf redir: *' + URL + '*');
    finally
      HTTPSender.Free;
    end;
  end;
  result:=URL;
end;

function DownloadHTTP(URL, TargetFile: string): boolean;
// Download file; retry if necessary.
// Deals with SourceForge download links
var
  Buffer: TMemoryStream;
begin
  result:=false;
  URL:=SourceForgeURL(URL); //Deal with sourceforge URLs
  try
    Buffer := TMemoryStream.Create;
    DownloadHTTPStream(URL, Buffer);
    Buffer.SaveToFile(TargetFile);
    result:=true;
  finally
    FreeAndNil(Buffer);
  end;
end;

Then you can download file using standard method.

...
uses httpsend
...
function DownloadHTTP(URL, TargetFile: string): boolean;
// Download file; retry if necessary.
// Deals with SourceForge download links
// Could use Synapse HttpGetBinary, but that doesn't deal
// with result codes (i.e. it happily downloads a 404 error document)
const
  MaxRetries=3;
var
  HTTPGetResult: boolean;
  HTTPSender: THTTPSend;
  RetryAttempt: integer;
begin
  result:=false;
  RetryAttempt:=1;
  //Optional: mangling of Sourceforge file download URLs; see below.
  URL:=SourceForgeURL(URL); //Deal with sourceforge URLs
  HTTPSender:=THTTPSend.Create;
  try
    try
      // Try to get the file
      HTTPGetResult:=HTTPSender.HTTPMethod('GET', URL);
      while (HTTPGetResult=false) and (RetryAttempt<MaxRetries) do
      begin
        sleep(500*RetryAttempt);
        HTTPGetResult:=HTTPSender.HTTPMethod('GET', URL);
        RetryAttempt:=RetryAttempt+1;
      end;
      // If we have an answer from the server, check if the file
      // was sent to us.
      case HTTPSender.Resultcode of
        100..299:
          begin
            with TFileStream.Create(TargetFile,fmCreate or fmOpenWrite) do
            try
              Seek(0, soFromBeginning);
              CopyFrom(HTTPSender.Document, 0);
            finally
              Free;
            end;
            result:=true;
          end; //informational, success
        300..399: result:=false; //redirection. Not implemented, but could be.
        400..499: result:=false; //client error; 404 not found etc
        500..599: result:=false; //internal server error
        else result:=false; //unknown code
      end;
    except
      // We don't care for the reason for this error; the download failed.
      result:=false;
    end;
  finally
    HTTPSender.Free;
  end;
end;

See also