A number of improvements based on a discussion with Chris McCafferty

<christopher.mccafferty@csg.ch>:

Add javascript: and telnet: to the types of URLs we ignore.

Add support for several additional URL-valued attributes on the BODY,
FRAME, IFRAME, LINK, OBJECT, and SCRIPT elements.
This commit is contained in:
Fred Drake 2001-04-04 17:47:25 +00:00
parent 33d2b84b2c
commit f3186e8242

View file

@ -481,8 +481,9 @@ def getpage(self, url_pair):
if self.name_table.has_key(url):
return self.name_table[url]
if url[:7] == 'mailto:' or url[:5] == 'news:':
self.note(1, " Not checking mailto/news URL")
scheme = urllib.splittype(url)
if scheme in ('mailto', 'news', 'javascript', 'telnet'):
self.note(1, " Not checking %s URL" % scheme)
return None
isint = self.inroots(url)
@ -792,10 +793,31 @@ def end_a(self): pass
def do_area(self, attributes):
self.link_attr(attributes, 'href')
def do_body(self, attributes):
self.link_attr(attributes, 'background')
def do_img(self, attributes):
self.link_attr(attributes, 'src', 'lowsrc')
def do_frame(self, attributes):
self.link_attr(attributes, 'src', 'longdesc')
def do_iframe(self, attributes):
self.link_attr(attributes, 'src', 'longdesc')
def do_link(self, attributes):
for name, value in attributes:
if name == "rel":
parts = string.split(string.lower(value))
if ( parts == ["stylesheet"]
or parts == ["alternate", "stylesheet"]):
self.link_attr(attributes, "href")
break
def do_object(self, attributes):
self.link_attr(attributes, 'data', 'usemap')
def do_script(self, attributes):
self.link_attr(attributes, 'src')
def link_attr(self, attributes, *args):