(original) (raw)

--- ../feedparser3/feedparser.py 2007-09-01 12:28:54.000000000 -0400 +++ feedparser.py 2007-09-01 18:58:03.000000000 -0400 @@ -239,9 +239,9 @@ realkey = self.keymap.get(key, key) if isinstance(realkey, list): for k in realkey: - if UserDict.has_key(self, k): + if k in self: return UserDict.__getitem__(self, k) - if UserDict.has_key(self, key): + if key in self: return UserDict.__getitem__(self, key) return UserDict.__getitem__(self, realkey) @@ -287,9 +287,6 @@ else: return self.__setitem__(key, value) - def __contains__(self, key): - return key in self - def zopeCompatibilityHack(): global FeedParserDict del FeedParserDict @@ -323,8 +320,8 @@ ) import string _ebcdic_to_ascii_map = string.maketrans( \ - ''.join(map(chr, range(256))), ''.join(map(chr, emap))) - return s.translate(_ebcdic_to_ascii_map) + bytes(range(256)), bytes(emap)) + return s.translate(_ebcdic_to_ascii_map).decode('iso-8859-1') _cp1252 = { chr(128): chr(8364), # euro sign @@ -611,7 +608,7 @@ else: try: name2codepoint[ref] except KeyError: text = '&%s;' % ref - else: text = chr(name2codepoint[ref]).encode('utf-8') + else: text = chr(name2codepoint[ref]) self.elementstack[-1][2].append(text) def handle_data(self, text, escape=1): @@ -719,7 +716,7 @@ # decode base64 content if base64 and self.contentparams.get('base64', 0): try: - output = base64.decodestring(output) + output = base64.decodestring(output.encode('utf-8')) except binascii.Error: pass except binascii.Incomplete: @@ -1636,10 +1633,10 @@ data = re.compile(r'', self._shorttag_replace, data) # bug [ 1399464 ] Bad regexp for _shorttag_replace data = re.sub(r'<([^<\s]+?)\s*/>', self._shorttag_replace, data) + if self.encoding and not isinstance(data, type('')): + data = data.decode(self.encoding) data = data.replace(''', "'") data = data.replace('"', '"') - if self.encoding and isinstance(data, type('')): - data = data.encode(self.encoding) sgmllib.SGMLParser.feed(self, data) sgmllib.SGMLParser.close(self) @@ -1668,7 +1665,7 @@ value = str(value, self.encoding) except: value = str(value, 'iso-8859-1') - uattrs.append((str(key, self.encoding), value)) + uattrs.append((key, value)) strattrs = ''.join([' %s="%s"' % (key, value) for key, value in uattrs]) if self.encoding: try: @@ -2666,7 +2663,7 @@ # try to open with native open function (if url_file_stream_or_string is a filename) try: - return open(url_file_stream_or_string) + return open(url_file_stream_or_string,'rb') except: pass @@ -2705,9 +2702,7 @@ + r'(:(?P\d{2}(\.\d*)?))?' + r'(?P[+-](?P\d{2})(:(?P\d{2}))?|Z)?)?' for tmpl in _iso8601_tmpl] -del tmpl _iso8601_matches = [re.compile(regex).match for regex in _iso8601_re] -del regex def _parse_date_iso8601(dateString): '''Parse a variety of ISO-8601-compatible formats like 20040105''' m = None @@ -3086,7 +3081,7 @@ if len(date9tuple) != 9: if _debug: sys.stderr.write('date handler function must return 9-tuple\n') raise ValueError - map(int, date9tuple) + list(map(int, date9tuple)) return date9tuple except Exception as e: if _debug: sys.stderr.write('%s raised %s\n' % (handler.__name__, repr(e))) @@ -3237,35 +3232,35 @@ ''' if _debug: sys.stderr.write('entering _toUTF8, trying encoding %s\n' % encoding) # strip Byte Order Mark (if present) - if (len(data) >= 4) and (data[:2] == '\xfe\xff') and (data[2:4] != '\x00\x00'): + if (len(data) >= 4) and (data[:2] == b'\xfe\xff') and (data[2:4] != b'\x00\x00'): if _debug: sys.stderr.write('stripping BOM\n') if encoding != 'utf-16be': sys.stderr.write('trying utf-16be instead\n') encoding = 'utf-16be' data = data[2:] - elif (len(data) >= 4) and (data[:2] == '\xff\xfe') and (data[2:4] != '\x00\x00'): + elif (len(data) >= 4) and (data[:2] == b'\xff\xfe') and (data[2:4] != b'\x00\x00'): if _debug: sys.stderr.write('stripping BOM\n') if encoding != 'utf-16le': sys.stderr.write('trying utf-16le instead\n') encoding = 'utf-16le' data = data[2:] - elif data[:3] == '\xef\xbb\xbf': + elif data[:3] == b'\xef\xbb\xbf': if _debug: sys.stderr.write('stripping BOM\n') if encoding != 'utf-8': sys.stderr.write('trying utf-8 instead\n') encoding = 'utf-8' data = data[3:] - elif data[:4] == '\x00\x00\xfe\xff': + elif data[:4] == b'\x00\x00\xfe\xff': if _debug: sys.stderr.write('stripping BOM\n') if encoding != 'utf-32be': sys.stderr.write('trying utf-32be instead\n') encoding = 'utf-32be' data = data[4:] - elif data[:4] == '\xff\xfe\x00\x00': + elif data[:4] == b'\xff\xfe\x00\x00': if _debug: sys.stderr.write('stripping BOM\n') if encoding != 'utf-32le': @@ -3317,7 +3312,7 @@ result['entries'] = [] if _XML_AVAILABLE: result['bozo'] = 0 - if isinstance(handlers, types.InstanceType): + if not hasattr(handlers, '__getitem__'): handlers = [handlers] try: f = _open_resource(url_file_stream_or_string, etag, modified, agent, referrer, handlers) @@ -3475,7 +3470,7 @@ saxparser.setContentHandler(feedparser) saxparser.setErrorHandler(feedparser) source = xml.sax.xmlreader.InputSource() - source.setByteStream(_StringIO(data)) + source.setByteStream(_StringIO(str(data,'utf-8'))) if hasattr(saxparser, '_ns_stack'): # work around bug in built-in SAX parser (doesn't recognize xml: namespace) # PyXML doesn't have this problem, and it doesn't have _ns_stack either