class for making http requests easier (mainly for plugins to use) this class can check the bot proxy configuration to determine if a proxy needs to be used, which includes support for per-url proxy configuration.
Create the HttpUtil instance, associating it with Bot bot
# File lib/rbot/core/utils/httputil.rb, line 291 def initialize(bot) @bot = bot @cache = Hash.new @headers = { 'Accept-Charset' => 'utf-8;q=1.0, *;q=0.8', 'Accept-Encoding' => 'gzip;q=1, deflate;q=1, identity;q=0.8, *;q=0.2', 'User-Agent' => "rbot http util #{$version} (#{Irc::Bot::SOURCE_URL})" } debug "starting http cache cleanup timer" @timer = @bot.timer.add(300) { self.remove_stale_cache unless @bot.config['http.no_expire_cache'] } end
Clean up on HttpUtil unloading, by stopping the cache cleanup timer.
# File lib/rbot/core/utils/httputil.rb, line 307 def cleanup debug 'stopping http cache cleanup timer' @bot.timer.remove(@timer) end
uri |
uri to query (URI object or String) |
Simple GET request, returns (if possible) response body following redirs and caching if requested, yielding the actual response(s) to the optional block. See get_response for details on the supported options
# File lib/rbot/core/utils/httputil.rb, line 635 def get(uri, options = {}, &block) # :yields: resp begin resp = get_response(uri, options, &block) raise "http error: #{resp}" unless Net::HTTPOK === resp || Net::HTTPPartialContent === resp return resp.body rescue Exception => e error e end return nil end
uri |
uri to query (URI object or String) |
nbytes |
number of bytes to get |
Partial GET request, returns (if possible) the first nbytes bytes of the response body, following redirs and caching if requested, yielding the actual response(s) to the optional block. See get_response for details on the supported options
# File lib/rbot/core/utils/httputil.rb, line 693 def get_partial(uri, nbytes = @bot.config['http.info_bytes'], options = {}, &block) # :yields: resp opts = {:range => "bytes=0-#{nbytes}"}.merge(options) return get(uri, opts, &block) end
uri |
URI to create a proxy for |
Return a net/http Proxy object, configured for proxying based on the bot's proxy configuration. See proxy_required for more details on this.
# File lib/rbot/core/utils/httputil.rb, line 362 def get_proxy(uri, options = {}) opts = { :read_timeout => @bot.config["http.read_timeout"], :open_timeout => @bot.config["http.open_timeout"] }.merge(options) proxy = nil proxy_host = nil proxy_port = nil proxy_user = nil proxy_pass = nil if @bot.config["http.use_proxy"] if (ENV['http_proxy']) proxy = URI.parse ENV['http_proxy'] rescue nil end if (@bot.config["http.proxy_uri"]) proxy = URI.parse @bot.config["http.proxy_uri"] rescue nil end if proxy debug "proxy is set to #{proxy.host} port #{proxy.port}" if proxy_required(uri) proxy_host = proxy.host proxy_port = proxy.port proxy_user = @bot.config["http.proxy_user"] proxy_pass = @bot.config["http.proxy_pass"] end end end h = Net::HTTP.new(uri.host, uri.port, proxy_host, proxy_port, proxy_user, proxy_pass) h.use_ssl = true if uri.scheme == "https" h.read_timeout = opts[:read_timeout] h.open_timeout = opts[:open_timeout] return h end
uri |
uri to query (URI object or String) |
Generic http transaction method. It will return a Net::HTTPResponse object or raise an exception
If a block is given, it will yield the response (see :yield option)
Currently supported options:
method |
request method [:get (default), :post or :head] |
open_timeout |
open timeout for the proxy |
read_timeout |
read timeout for the proxy |
cache |
should we cache results? |
yield |
if :final [default], calls the block for the response object; if :all, call the block for all intermediate redirects, too |
max_redir |
how many redirects to follow before raising the exception if -1, don't follow redirects, just return them |
range |
make a ranged request (usually GET). accepts a string for HTTP/1.1 "Range:" header (i.e. "bytes=0-1000") |
body |
request body (usually for POST requests) |
headers |
additional headers to be set for the request. Its value must be a Hash in the form { 'Header' => 'value' } |
# File lib/rbot/core/utils/httputil.rb, line 517 def get_response(uri_or_s, options = {}, &block) # :yields: resp uri = uri_or_s.kind_of?(URI) ? uri_or_s : URI.parse(uri_or_s.to_s) unless URI::HTTP === uri if uri.scheme raise "#{uri.scheme.inspect} URI scheme is not supported" else raise "don't know what to do with #{uri.to_s.inspect}" end end opts = { :max_redir => @bot.config['http.max_redir'], :yield => :final, :cache => true, :method => :GET }.merge(options) resp = nil req_class = case opts[:method].to_s.downcase.intern when :head, :"net::http::head" opts[:max_redir] = -1 Net::HTTP::Head when :get, :"net::http::get" Net::HTTP::Get when :post, :"net::http::post" opts[:cache] = false opts[:body] or raise 'post request w/o a body?' warning "refusing to cache POST request" if options[:cache] Net::HTTP::Post else warning "unsupported method #{opts[:method]}, doing GET" Net::HTTP::Get end if req_class != Net::HTTP::Get && opts[:range] warning "can't request ranges for #{req_class}" opts.delete(:range) end cache_key = "#{opts[:range]}|#{req_class}|#{uri.to_s}" if req_class != Net::HTTP::Get && req_class != Net::HTTP::Head if opts[:cache] warning "can't cache #{req_class.inspect} requests, working w/o cache" opts[:cache] = false end end debug "get_response(#{uri}, #{opts.inspect})" cached = @cache[cache_key] if opts[:cache] && cached debug "got cached" if !cached.expired? debug "using cached" cached.use return handle_response(uri, cached.response, opts, &block) end end headers = @headers.dup.merge(opts[:headers] || {}) headers['Range'] = opts[:range] if opts[:range] headers['Authorization'] = opts[:auth_head] if opts[:auth_head] if opts[:cache] && cached && (req_class == Net::HTTP::Get) cached.setup_headers headers end req = req_class.new(uri.request_uri, headers) if uri.user && uri.password req.basic_auth(uri.user, uri.password) opts[:auth_head] = req['Authorization'] end req.body = opts[:body] if req_class == Net::HTTP::Post debug "prepared request: #{req.to_hash.inspect}" begin get_proxy(uri, opts).start do |http| http.request(req) do |resp| resp['x-rbot-location'] = uri.to_s if Net::HTTPNotModified === resp debug "not modified" begin cached.revalidate(resp) rescue Exception => e error e end debug "reusing cached" resp = cached.response elsif Net::HTTPServerError === resp || Net::HTTPClientError === resp debug "http error, deleting cached obj" if cached @cache.delete(cache_key) end begin return handle_response(uri, resp, opts, &block) ensure if cached = CachedObject.maybe_new(resp) rescue nil debug "storing to cache" @cache[cache_key] = cached end end end end rescue Exception => e error e raise e.message end end
Internal method used to hanlde response resp received when making a request for URI uri.
It follows redirects, optionally yielding them if option :yield is :all.
Also yields and returns the final resp.
# File lib/rbot/core/utils/httputil.rb, line 407 def handle_response(uri, resp, opts, &block) # :yields: resp if Net::HTTPRedirection === resp && opts[:max_redir] >= 0 if resp.key?('location') raise 'Too many redirections' if opts[:max_redir] <= 0 yield resp if opts[:yield] == :all && block_given? # some servers actually provide unescaped location, e.g. # http://ulysses.soup.io/post/60734021/Image%20curve%20ball # rediects to something like # http://ulysses.soup.io/post/60734021/Image curve ball?sessid=8457b2a3752085cca3fb1d79b9965446 # causing the URI parser to (obviously) complain. We cannot just # escape blindly, as this would make a mess of already-escaped # locations, so we only do it if the URI.parse fails loc = resp['location'] escaped = false debug "redirect location: #{loc.inspect}" begin new_loc = URI.join(uri.to_s, loc) rescue URI.parse(loc) rescue if escaped raise $! else loc = URI.escape(loc) escaped = true debug "escaped redirect location: #{loc.inspect}" retry end end new_opts = opts.dup new_opts[:max_redir] -= 1 case opts[:method].to_s.downcase.intern when :post, :"net::http::post" new_opts[:method] = :get end if resp['set-cookie'] debug "set cookie request for #{resp['set-cookie']}" cookie, cookie_flags = (resp['set-cookie']+'; ').split('; ', 2) domain = uri.host cookie_flags.scan(/(\S+)=(\S+);/) { |key, val| if key.intern == :domain domain = val break end } debug "cookie domain #{domain} / #{new_loc.host}" if new_loc.host.rindex(domain) == new_loc.host.length - domain.length debug "setting cookie" new_opts[:headers] ||= Hash.new new_opts[:headers]['Cookie'] = cookie else debug "cookie is for another domain, ignoring" end end debug "following the redirect to #{new_loc}" return get_response(new_loc, new_opts, &block) else warning ":| redirect w/o location?" end end class << resp undef_method :body alias :body :cooked_body end unless resp['content-type'] debug "No content type, guessing" resp['content-type'] = case resp['x-rbot-location'] when /.html?$/ 'text/html' when /.xml$/ 'application/xml' when /.xhtml$/ 'application/xml+xhtml' when /.(gif|png|jpe?g|jp2|tiff?)$/ "image/#{$1.sub(/^jpg$/,'jpeg').sub(/^tif$/,'tiff')}" else 'application/octetstream' end end if block_given? yield(resp) else # Net::HTTP wants us to read the whole body here resp.raw_body end return resp end
uri |
uri to query (URI object or String) |
Simple HEAD request, returns (if possible) response head following redirs and caching if requested, yielding the actual response(s) to the optional block. See get_response for details on the supported options
# File lib/rbot/core/utils/httputil.rb, line 653 def head(uri, options = {}, &block) # :yields: resp opts = {:method => :head}.merge(options) begin resp = get_response(uri, opts, &block) # raise "http error #{resp}" if Net::HTTPClientError === resp || # Net::HTTPServerError == resp return resp rescue Exception => e error e end return nil end
uri |
uri to query (URI object or String) |
data |
body of the POST |
Simple POST request, returns (if possible) response following redirs and caching if requested, yielding the response(s) to the optional block. See get_response for details on the supported options
# File lib/rbot/core/utils/httputil.rb, line 673 def post(uri, data, options = {}, &block) # :yields: resp opts = {:method => :post, :body => data, :cache => false}.merge(options) begin resp = get_response(uri, opts, &block) raise 'http error' unless Net::HTTPOK === resp or Net::HTTPCreated === resp return resp rescue Exception => e error e end return nil end
This method checks if a proxy is required to access uri, by looking at the values of config values http.proxy_include and http.proxy_exclude.
Each of these config values, if set, should be a Regexp the server name and IP address should be checked against.
# File lib/rbot/core/utils/httputil.rb, line 318 def proxy_required(uri) use_proxy = true if @bot.config["http.proxy_exclude"].empty? && @bot.config["http.proxy_include"].empty? return use_proxy end list = [uri.host] begin list.concat Resolv.getaddresses(uri.host) rescue StandardError => err warning "couldn't resolve host uri.host" end unless @bot.config["http.proxy_exclude"].empty? re = @bot.config["http.proxy_exclude"].collect{|r| Regexp.new(r)} re.each do |r| list.each do |item| if r.match(item) use_proxy = false break end end end end unless @bot.config["http.proxy_include"].empty? re = @bot.config["http.proxy_include"].collect{|r| Regexp.new(r)} re.each do |r| list.each do |item| if r.match(item) use_proxy = true break end end end end debug "using proxy for uri #{uri}?: #{use_proxy}" return use_proxy end
# File lib/rbot/core/utils/httputil.rb, line 698 def remove_stale_cache debug "Removing stale cache" now = Time.new max_last = @bot.config['http.expire_time'] * 60 max_first = @bot.config['http.max_cache_time'] * 60 debug "#{@cache.size} pages before" begin @cache.reject! { |k, val| (now - val.last_used > max_last) || (now - val.first_used > max_first) } rescue => e error "Failed to remove stale cache: #{e.pretty_inspect}" end debug "#{@cache.size} pages after" end
Generated with the Darkfish Rdoc Generator 2.