给aria2打了个补丁,支持国内多数下载网站防盗链链接下载 |
发布: 2010-05-28 21:53 |
这个补丁的作用,修改重定向时对引用及cookie的处理,使其能正确下载国内多数下载网站的防盗链链接。 补丁修改的机制说明: 在30x重定向时,RFC的HTTP标准规定不能发送Cookie, 不能使用引用。但国内多数下载站点这两者都同时使用,而且设置Cookie的域名比较复杂,这些都被作为防盗链的手段广泛使用,使其只能在浏览器中下载,遵守标准的普通下载工具却无法正常下载这些链接。 针对这种情况,在aria2中修正机制,在记录cookie的时候对域名进行处理,只保留有二级域名, 在查找cookie的时候也只是使用二级域名进行查询。 在做redirect的时候,仍旧按照服务器的响应记录cookie, 并在redirect请求中使用这些cookie。 在做redirect的时候,使用本次地址作为引用发送给服务器。 用户在--header参数中传递的Cookie会与本次服务器设置的Cookie合并,一起用于下一次转向链接的请求。 测试网站列表: 非凡软件下载 sina下载 down.tech.sina.com.cn www.greendown.cn ... 如果您测试了其他站点,请email我。 补丁如下: aria2-1.9.2_redirect_no_cookie_no_refer.patch [code type="diff"] diff --git a/src/HttpRequest.cc b/src/HttpRequest.cc index c6c0afb..dfc981f 100644 --- a/src/HttpRequest.cc +++ b/src/HttpRequest.cc @@ -218,9 +218,10 @@ std::string HttpRequest::createRequest() builtinHds.push_back(std::make_pair("Referer:", getPreviousURI())); } if(!_cookieStorage.isNull()) { + std::string baseDomain = util::getBaseDomainName(getHost()); std::string cookiesValue; std::vector - _cookieStorage->criteriaFind(getHost(), + _cookieStorage->criteriaFind(baseDomain, // getHost(), getDir(), Time().getTime(), getProtocol() == Request::PROTO_HTTPS ? @@ -235,15 +236,25 @@ std::string HttpRequest::createRequest() } for(std::vector builtinHds.begin(), eoi = builtinHds.end(); i != eoi; ++i) { - std::vector - std::vector + // std::vector + // std::vector + std::vector + std::vector for(; j != jend; ++j) { if(util::startsWith(*j, (*i).first)) { break; } } + // hacked by liuguangzhao@users.sf.net + // if user header is not cookie, override it, or leave it there no touched. + // for this case, the best methord is combine to cookies part to one Cookie: header line if(j == jend) { strappend(requestLine, (*i).first, " ", (*i).second, A2STR::CRLF); + } else { + // combine the new cookie to header line + if (util::startsWith((*i).first, "Cookie") && !util::endsWith(*j, (*i).second)) { + (*j) += ";" + (*i).second; + } } } // append additional headers given by user. diff --git a/src/HttpResponse.cc b/src/HttpResponse.cc index 6957fe9..79de4bd 100644 --- a/src/HttpResponse.cc +++ b/src/HttpResponse.cc @@ -116,11 +116,13 @@ std::string HttpResponse::determinFilename() const void HttpResponse::retrieveCookie() { + // hacked by liuguangzhao@users.sf.net + std::string baseDomain = util::getBaseDomainName(httpRequest->getHost()); std::vector for(std::vector itr != eoi; ++itr) { httpRequest->getCookieStorage()->parseAndStore(*itr, - httpRequest->getHost(), + baseDomain, // httpRequest->getHost(), httpRequest->getDir()); } } diff --git a/src/Request.cc b/src/Request.cc index 9fbba5a..4204c0a 100644 --- a/src/Request.cc +++ b/src/Request.cc @@ -125,7 +125,8 @@ void Request::setReferer(const std::string& uri) } bool Request::redirectUri(const std::string& uri) { - _previousUri = A2STR::NIL; + // _previousUri = A2STR::NIL; + _previousUri = _uri; // hacked by liuguangzhao@users.sf.net _supportsPersistentConnection = true; ++_redirectCount; std::string redirectedUri; diff --git a/src/util.cc b/src/util.cc index 58a37d4..bb10bdc 100644 --- a/src/util.cc +++ b/src/util.cc @@ -1305,6 +1305,48 @@ void removeMetalinkContentTypes(const SharedHandle } } +// hacked by liuguangzhao@users.sf.net +/* + only leave the last 2 or three domain parts + */ + std::string getBaseDomainName(const std::string &s) + { + std::string baseDomain; + std::string ts = s; + if (util::isNumericHost(s)) { + return s; + } + size_t colonPos = ts.find(':'); + if (colonPos != std::string::npos) { + ts.resize(colonPos); + } + + std::vector + std::string delims("."); + util::split(ts, std::back_inserter(domParts), delims); + + int minDomParts = 2; + int domLevel = domParts.size(); + if (domParts.at(domLevel - 1) == "cn" + || domParts.at(domLevel - 1) == "cc" + || domParts.at(domLevel - 1) == "us") { + if (domParts.at(domLevel - 2) == "org" + || domParts.at(domLevel - 2) == "com" + || domParts.at(domLevel - 2) == "net" + || domParts.at(domLevel - 2) == "info") { + minDomParts = 3; + } + } + + baseDomain = domParts.at(domLevel - 2) + "." + domParts.at(domLevel - 1); + if (minDomParts == 3) { + baseDomain = domParts.at(domLevel - 3) + "." + baseDomain; + } + + return baseDomain; + } + + } // namespace util } // namespace aria2 diff --git a/src/util.h b/src/util.h index 9e1c2a5..01b6a1c 100644 --- a/src/util.h +++ b/src/util.h @@ -395,6 +395,9 @@ bool inSameCidrBlock(const std::string& ip1, const std::string& ip2, int bits); void removeMetalinkContentTypes(const SharedHandle + // hacked by liuguangzhao@users.sf.net + std::string getBaseDomainName(const std::string &s); + } // namespace util } // namespace aria2 [/code] |
原文: http://qtchina.tk/?q=node/435 |
Powered by zexport
|