Toggle navigation

email.header

Source code for email.header

# Copyright (C) 2002-2006 Python Software Foundation
# Author: Ben Gertzfield, Barry Warsaw
# Contact: email-sig@python.org

"""Header encoding and decoding functionality."""

__all__ = [
    'Header',
    'decode_header',
    'make_header',
    ]

import re
import binascii

import email.quoprimime
import email.base64mime

from email.errors import HeaderParseError
from email.charset import Charset

NL = '\n'
SPACE = ' '
USPACE = u' '
SPACE8 = ' ' * 8
UEMPTYSTRING = u''

MAXLINELEN = 76

USASCII = Charset('us-ascii')
UTF8 = Charset('utf-8')

# Match encoded-word strings in the form =?charset?q?Hello_World?=
ecre = re.compile(r'''
  =\?                   # literal =?
  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
  \?                    # literal ?
  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
  \?                    # literal ?
  (?P<encoded>.*?)      # non-greedy up to the next ?= is the encoded string
  \?=                   # literal ?=
  (?=[ \t]|$)           # whitespace or the end of the string
  ''', re.VERBOSE | re.IGNORECASE | re.MULTILINE)

# Field name regexp, including trailing colon, but not separating whitespace,
# according to RFC 2822.  Character range is from tilde to exclamation mark.
# For use with .match()
fcre = re.compile(r'[\041-\176]+:$')

# Find a header embedded in a putative header value.  Used to check for
# header injection attack.
_embeded_header = re.compile(r'\n[^ \t]+:')




# Helpers
_max_append = email.quoprimime._max_append




[docs]def decode_header(header): """Decode a message header value without converting charset. Returns a list of (decoded_string, charset) pairs containing each of the decoded parts of the header. Charset is None for non-encoded parts of the header, otherwise a lower-case string containing the name of the character set specified in the encoded string. An email.errors.HeaderParseError may be raised when certain decoding error occurs (e.g. a base64 decoding exception). """ # If no encoding, just return the header header = str(header) if not ecre.search(header): return [(header, None)] decoded = [] dec = '' for line in header.splitlines(): # This line might not have an encoding in it if not ecre.search(line): decoded.append((line, None)) continue parts = ecre.split(line) while parts: unenc = parts.pop(0).strip() if unenc: # Should we continue a long line? if decoded and decoded[-1][1] is None: decoded[-1] = (decoded[-1][0] + SPACE + unenc, None) else: decoded.append((unenc, None)) if parts: charset, encoding = [s.lower() for s in parts[0:2]] encoded = parts[2] dec = None if encoding == 'q': dec = email.quoprimime.header_decode(encoded) elif encoding == 'b': paderr = len(encoded) % 4 # Postel's law: add missing padding if paderr: encoded += '==='[:4 - paderr] try: dec = email.base64mime.decode(encoded) except binascii.Error: # Turn this into a higher level exception. BAW: Right # now we throw the lower level exception away but # when/if we get exception chaining, we'll preserve it. raise HeaderParseError if dec is None: dec = encoded if decoded and decoded[-1][1] == charset: decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1]) else: decoded.append((dec, charset)) del parts[0:3] return decoded
[docs]def make_header(decoded_seq, maxlinelen=None, header_name=None, continuation_ws=' '): """Create a Header from a sequence of pairs as returned by decode_header() decode_header() takes a header value string and returns a sequence of pairs of the format (decoded_string, charset) where charset is the string name of the character set. This function takes one of those sequence of pairs and returns a Header instance. Optional maxlinelen, header_name, and continuation_ws are as in the Header constructor. """ h = Header(maxlinelen=maxlinelen, header_name=header_name, continuation_ws=continuation_ws) for s, charset in decoded_seq: # None means us-ascii but we can simply pass it on to h.append() if charset is not None and not isinstance(charset, Charset): charset = Charset(charset) h.append(s, charset) return h
def _split_ascii(s, firstlen, restlen, continuation_ws, splitchars): lines = [] maxlen = firstlen for line in s.splitlines(): # Ignore any leading whitespace (i.e. continuation whitespace) already # on the line, since we'll be adding our own. line = line.lstrip() if len(line) < maxlen: lines.append(line) maxlen = restlen continue # Attempt to split the line at the highest-level syntactic break # possible. Note that we don't have a lot of smarts about field # syntax; we just try to break on semi-colons, then commas, then # whitespace. for ch in splitchars: if ch in line: break else: # There's nothing useful to split the line on, not even spaces, so # just append this line unchanged lines.append(line) maxlen = restlen continue # Now split the line on the character plus trailing whitespace cre = re.compile(r'%s\s*' % ch) if ch in ';,': eol = ch else: eol = '' joiner = eol + ' ' joinlen = len(joiner) wslen = len(continuation_ws.replace('\t', SPACE8)) this = [] linelen = 0 for part in cre.split(line): curlen = linelen + max(0, len(this)-1) * joinlen partlen = len(part) onfirstline = not lines # We don't want to split after the field name, if we're on the # first line and the field name is present in the header string. if ch == ' ' and onfirstline and \ len(this) == 1 and fcre.match(this[0]): this.append(part) linelen += partlen elif curlen + partlen > maxlen: if this: lines.append(joiner.join(this) + eol) # If this part is longer than maxlen and we aren't already # splitting on whitespace, try to recursively split this line # on whitespace. if partlen > maxlen and ch != ' ': subl = _split_ascii(part, maxlen, restlen, continuation_ws, ' ') lines.extend(subl[:-1]) this = [subl[-1]] else: this = [part] linelen = wslen + len(this[-1]) maxlen = restlen else: this.append(part) linelen += partlen # Put any left over parts on a line by themselves if this: lines.append(joiner.join(this)) return lines def _binsplit(splittable, charset, maxlinelen): i = 0 j = len(splittable) while i < j: # Invariants: # 1. splittable[:k] fits for all k <= i (note that we *assume*, # at the start, that splittable[:0] fits). # 2. splittable[:k] does not fit for any k > j (at the start, # this means we shouldn't look at any k > len(splittable)). # 3. We don't know about splittable[:k] for k in i+1..j. # 4. We want to set i to the largest k that fits, with i <= k <= j. # m = (i+j+1) >> 1 # ceiling((i+j)/2); i < m <= j chunk = charset.from_splittable(splittable[:m], True) chunklen = charset.encoded_header_len(chunk) if chunklen <= maxlinelen: # m is acceptable, so is a new lower bound. i = m else: # m is not acceptable, so final i must be < m. j = m - 1 # i == j. Invariant #1 implies that splittable[:i] fits, and # invariant #2 implies that splittable[:i+1] does not fit, so i # is what we're looking for. first = charset.from_splittable(splittable[:i], False) last = charset.from_splittable(splittable[i:], False) return first, last