为什么我不能在我的Delphi程序中获取AOL验证码映像?
题
新的演示代码:
我正在尝试从AOL获取验证码图像,并且我不断出现错误418。
unit imageunit;
///
/// https://new.aol.com/productsweb/
///
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, IdIOHandler, IdIOHandlerSocket, IdIOHandlerStack, IdSSL,
IdSSLOpenSSL, IdIntercept, IdZLibCompressorBase, IdCompressorZLib,
IdCookieManager, IdBaseComponent, IdComponent, IdTCPConnection, IdTCPClient,
IdHTTP,jpeg,GIFImg, ExtCtrls, PerlRegEx;
type
TForm2 = class(TForm)
IdHTTP1: TIdHTTP;
IdCookieManager1: TIdCookieManager;
IdCompressorZLib1: TIdCompressorZLib;
IdConnectionIntercept1: TIdConnectionIntercept;
IdSSLIOHandlerSocketOpenSSL1: TIdSSLIOHandlerSocketOpenSSL;
Panel1: TPanel;
Image1: TImage;
Panel2: TPanel;
Button1: TButton;
PerlRegEx1: TPerlRegEx;
Memo1: TMemo;
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;
var
Form2: TForm2;
implementation
{$R *.dfm}
function getaimcaptchaimage(data:string):string;
var
Regex: TPerlRegEx;
ResultString: string;
begin
Regex := TPerlRegEx.Create(nil);
Regex.RegEx := '<img src="/productsweb/WordVerImage?(.*?)"';
Regex.Options := [preCaseless];
Regex.Subject := data;
if Regex.Match then begin
if Regex.SubExpressionCount >= 1 then begin
ResultString := Regex.SubExpressions[1];
end;
result:=Resultstring;
end;
end;
procedure TForm2.Button1Click(Sender: TObject);
var
JPI : TJPEGImage;
streamdata:TMemoryStream;
SStream: Tstringstream;
website:string;
begin
streamdata := TMemoryStream.Create;
SStream := tstringstream.Create ( '' );
try
idhttp1.Get('https://new.aol.com/productsweb/',SStream);
memo1.Text:=UTF8ToWideString ( SStream.DataString );
website:='https://new.aol.com/productsweb/WordVerImage'+getaimcaptchaimage( UTF8ToWideString ( SStream.DataString ));
form2.Caption:=website;
idhttp1.Get(website, Streamdata);
Except
{ Handle exceptions }
On E : Exception Do
Begin
MessageDlg('Exception: '+E.Message,mtError, [mbOK], 0);
End;
End;
//https://new.aol.com/productsweb/WordVerImage?20890843
//https://new.aol.com/productsweb/WordVerImage?91868359
///
/// gives error 418 unused
///
streamdata.Position := 0;
JPI := TJPEGImage.Create;
Try
JPI.LoadFromStream ( streamdata );
Finally
Image1.Picture.Assign ( JPI );
JPI.Free;
streamdata.Free;
End;
end;
end.
形式:
object Form2: TForm2
Left = 0
Top = 0
Caption = 'Form2'
ClientHeight = 247
ClientWidth = 480
Color = clBtnFace
Font.Charset = DEFAULT_CHARSET
Font.Color = clWindowText
Font.Height = -11
Font.Name = 'Tahoma'
Font.Style = []
OldCreateOrder = False
PixelsPerInch = 96
TextHeight = 13
object Panel1: TPanel
Left = 0
Top = 41
Width = 480
Height = 206
Align = alClient
TabOrder = 0
object Image1: TImage
Left = 1
Top = 1
Width = 478
Height = 115
Align = alClient
ExplicitLeft = 5
ExplicitTop = 17
ExplicitWidth = 200
ExplicitHeight = 70
end
object Memo1: TMemo
Left = 1
Top = 116
Width = 478
Height = 89
Align = alBottom
TabOrder = 0
ExplicitLeft = 80
ExplicitTop = 152
ExplicitWidth = 185
end
end
object Panel2: TPanel
Left = 0
Top = 0
Width = 480
Height = 41
Align = alTop
TabOrder = 1
object Button1: TButton
Left = 239
Top = 6
Width = 75
Height = 25
Caption = 'Button1'
TabOrder = 0
OnClick = Button1Click
end
end
object IdHTTP1: TIdHTTP
Intercept = IdConnectionIntercept1
IOHandler = IdSSLIOHandlerSocketOpenSSL1
MaxAuthRetries = 100
AllowCookies = True
HandleRedirects = True
RedirectMaximum = 100
ProxyParams.BasicAuthentication = False
ProxyParams.ProxyPort = 0
Request.ContentLength = -1
Request.Accept =
'image/gif, image/jpeg, image/pjpeg, image/pjpeg, application/x-s' +
'hockwave-flash, application/cade, application/xaml+xml, applicat' +
'ion/vnd.ms-xpsdocument, application/x-ms-xbap, application/x-ms-' +
'application, */*'
Request.BasicAuthentication = False
Request.Referer = 'http://www.yahoo.com'
Request.UserAgent =
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/201001' +
'22 firefox/3.6.1'
HTTPOptions = [hoForceEncodeParams]
CookieManager = IdCookieManager1
Compressor = IdCompressorZLib1
Left = 40
Top = 160
end
object IdCookieManager1: TIdCookieManager
Left = 360
Top = 136
end
object IdCompressorZLib1: TIdCompressorZLib
Left = 408
Top = 56
end
object IdConnectionIntercept1: TIdConnectionIntercept
Left = 304
Top = 72
end
object IdSSLIOHandlerSocketOpenSSL1: TIdSSLIOHandlerSocketOpenSSL
Intercept = IdConnectionIntercept1
MaxLineAction = maException
Port = 0
DefaultPort = 0
SSLOptions.Mode = sslmUnassigned
SSLOptions.VerifyMode = []
SSLOptions.VerifyDepth = 0
Left = 192
Top = 136
end
object PerlRegEx1: TPerlRegEx
Options = []
Left = 120
Top = 56
end
end
如果你去 https://new.aol.com/productsweb/您会注意到验证码图像具有类似的URL https://new.aol.com/productsweb/wordverimage?91868359
我将该URL放在编辑框中,并获得错误。
此代码有什么问题?
解决方案
在我的旧项目中,我从网络上读取了Captcha。我使用嵌入式Web浏览器组件缓存过程进行了此操作。因此,如果您可以从代码中读取临时Internet文件,则可以从缓存中读取图像数据。我在下面添加了一个简单的代码,
tembeddedwebbrowser单元宣布了getCachedFileFromurl和Clearallentries功能。我在解决方案中仅使用了我的复制代码,以使EXE尺寸较低。但是您可以使用组件更新的源。组件是开源。
uses
WinInet;
function GetCachedFileFromURL(strUL: string; var strLocalFile: string): Boolean;
var
lpEntryInfo: PInternetCacheEntryInfo;
hCacheDir: LongWord;
dwEntrySize: LongWord;
dwLastError: LongWord;
begin
Result := False;
dwEntrySize := 0;
// Begin the enumeration of the Internet cache.
FindFirstUrlCacheEntry(nil, TInternetCacheEntryInfo(nil^), dwEntrySize);
GetMem(lpEntryInfo, dwEntrySize);
hCacheDir := FindFirstUrlCacheEntry(nil, lpEntryInfo^, dwEntrySize);
if (hCacheDir <> 0) and (strUL = lpEntryInfo^.lpszSourceUrlName) then
begin
strLocalFile := lpEntryInfo^.lpszLocalFileName;
Result := True;
end;
FreeMem(lpEntryInfo);
if Result = False then
repeat
dwEntrySize := 0;
// Retrieves the next cache group in a cache group enumeration
FindNextUrlCacheEntry(hCacheDir, TInternetCacheEntryInfo(nil^), dwEntrySize);
dwLastError := GetLastError();
if (GetLastError = ERROR_INSUFFICIENT_BUFFER) then
begin
GetMem(lpEntryInfo, dwEntrySize);
if (FindNextUrlCacheEntry(hCacheDir, lpEntryInfo^, dwEntrySize)) then
begin
if strUL = lpEntryInfo^.lpszSourceUrlName then
begin
strLocalFile := lpEntryInfo^.lpszLocalFileName;
Result := True;
Break;
end;
end;
FreeMem(lpEntryInfo);
end;
until (dwLastError = ERROR_NO_MORE_ITEMS);
end;
procedure TForm1.ClearCache();
begin
SearchPattern := spAll;
ClearAllEntries;
end;
用法
procedure TForm1.Button1Click(Sender: TObject);
var
fname:string;
jpImg:TJPEGImage;
begin
ClearCache;
try
jpImg:=TJPEGImage.Create;
GetCachedFileFromURL('https://ebildirge.ssk.gov.tr/WPEB/PG',fname);
jpImg.LoadFromFile(fname);
finally
FreeAndNil(jpgImg);
end;
end;
其他提示
涉及饼干。如果您直接去验证码URL https://new.aol.com/productsweb/wordverimage?91868359 在尚未访问的浏览器中 https://new.aol.com/productsweb/ 然后您得到(刷新后):
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<html><head>
<title>418 unused</title>
</head><body>
<h1>unused</h1>
<p>The server encountered an internal error or
misconfiguration and was unable to complete
your request.</p>
<p>Please contact the server administrator,
null and inform them of the time the error occurred,
and anything you might have done that may have
caused the error.</p>
<p>More information about this error may be available
in the server error log.</p>
</body></html>
但是如果你访问 https://new.aol.com/productsweb/ 首先,您将获得图像。清除cookie,然后再收到错误(尽管正如Francois指出的那样,您没有首先得到响应,然后在刷新时您会得到图像。)
Indy支持Cookie,因此您需要添加对Cookie的支持,然后首先访问ProductSweb或从已知值中模拟cookie。
您会注意到,生成的图像不是基于仅作为参数传递的数字,而是基于cookie。有两个不同的浏览器(Chrome和Firefox),每个浏览器都有不同的cookie,然后访问相同的验证码URL,您将获得两个不同的图像。
好奇您想完成的工作。
我认为这意味着“走开”。不知何故,通过您的要求中的标题或其他内容,它确定您闻起来像机器人。也许是因为您要寻找它知道它不仅为您创造的图像。是的,可能就是这样。如果我在浏览器中转到您的URL,我也会得到418。
这不是您的代码。在浏览器中尝试...
(显然,您需要从'http s''...中删除空白...)
这个网址 https://new.aol.com/productsweb/
显然需要打电话给您才能获得验证码图像。否则您会遇到(不正确)错误 418 Unused
.
有时我不得不尝试两次图像#,因为我第一次得到一个 420 Unused
错误...
您最好问他们,因为他们的API似乎并不稳定...
回复:HTTP 418笑话。如果您想对HTTP错误代码进行一些乐趣 网络开发人员和他的女友