# HTTP client class # # See the following URL for a description of the HTTP/1.0 protocol: # http://www.w3.org/hypertext/WWW/Protocols/ # (I actually implemented it from a much earlier draft.) # # Example: # # >>> from httplib import HTTP # >>> h = HTTP('www.python.org') # >>> h.putrequest('GET', '/index.html') # >>> h.putheader('Accept', 'text/html') # >>> h.putheader('Accept', 'text/plain') # >>> h.endheaders() # >>> errcode, errmsg, headers = h.getreply() # >>> if errcode == 200: # ... f = h.getfile() # ... print f.read() # Print the raw HTML # ... # # Python Language Home Page # [...many more lines...] # >>> # # Note that an HTTP object is used for a single request -- to issue a # second request to the same server, you create a new HTTP object. # (This is in accordance with the protocol, which uses a new TCP # connection for each request.) import os import socket import string import regex import regsub import mimetools HTTP_VERSION = 'HTTP/1.0' HTTP_PORT = 80 replypat = regsub.gsub('\\.', '\\\\.', HTTP_VERSION) + \ '[ \t]+\([0-9][0-9][0-9]\)\(.*\)' replyprog = regex.compile(replypat) class HTTP: def __init__(self, host = '', port = 0): self.debuglevel = 0 self.file = None if host: self.connect(host, port) def set_debuglevel(self, debuglevel): self.debuglevel = debuglevel def connect(self, host, port = 0): if not port: i = string.find(host, ':') if i >= 0: host, port = host[:i], host[i+1:] try: port = string.atoi(port) except string.atoi_error: raise socket.error, "nonnumeric port" if not port: port = HTTP_PORT self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) if self.debuglevel > 0: print 'connect:', (host, port) self.sock.connect(host, port) def send(self, str): if self.debuglevel > 0: print 'send:', `str` self.sock.send(str) def putrequest(self, request, selector): if not selector: selector = '/' str = '%s %s %s\r\n' % (request, selector, HTTP_VERSION) self.send(str) def putheader(self, header, *args): str = '%s: %s\r\n' % (header, string.joinfields(args,'\r\n\t')) self.send(str) def endheaders(self): self.send('\r\n') def getreply(self): self.file = self.sock.makefile('rb') self.sock = None line = self.file.readline() if self.debuglevel > 0: print 'reply:', `line` if replyprog.match(line) < 0: self.headers = None return -1, line, self.headers errcode, errmsg = replyprog.group(1, 2) errcode = string.atoi(errcode) errmsg = string.strip(errmsg) self.headers = mimetools.Message(self.file, 0) return errcode, errmsg, self.headers def getfile(self): return self.file def close(self): if self.file: self.file.close() self.file = None def test(): import sys import getopt opts, args = getopt.getopt(sys.argv[1:], 'd') dl = 0 for o, a in opts: if o == '-d': dl = dl + 1 host = 'www.python.org' selector = '/' if args[0:]: host = args[0] if args[1:]: selector = args[1] h = HTTP() h.set_debuglevel(dl) h.connect(host) h.putrequest('GET', selector) h.endheaders() errcode, errmsg, headers = h.getreply() print 'errcode =', errcode print 'errmsg =', errmsg print if headers: for header in headers.headers: print string.strip(header) print print h.getfile().read() if __name__ == '__main__': test()