#!/usr/bin/python
"""The primary server for SpamBayes.
Currently serves the web interface, and any configured POP3 and SMTP
proxies.
The POP3 proxy works with classifier.py, and adds a simple
X-Spambayes-Classification header (ham/spam/unsure) to each incoming
email. You point the proxy at your POP3 server, and configure your
email client to collect mail from the proxy then filter on the added
header. Usage:
sb_server.py [options] [<server> [<server port>]]
<server> is the name of your real POP3 server
<port> is the port number of your real POP3 server, which
defaults to 110.
options:
-h : Displays this help message.
-d FILE : use the named DBM database file
-p FILE : the the named Pickle database file
-l port : proxy listens on this port number (default 110)
-u port : User interface listens on this port number
(default 8880; Browse http://localhost:8880/)
-b : Launch a web browser showing the user interface.
-o section:option:value :
set [section, option] in the options database
to value
All command line arguments and switches take their default
values from the [pop3proxy] and [html_ui] sections of
bayescustomize.ini.
For safety, and to help debugging, the whole POP3 conversation is
written out to _pop3proxy.log for each run, if
options["globals", "verbose"] is True.
To make rebuilding the database easier, uploaded messages are appended
to _pop3proxyham.mbox and _pop3proxyspam.mbox.
"""
# This module is part of the spambayes project, which is Copyright 2002
# The Python Software Foundation and is covered by the Python Software
# Foundation license.
__author__ = "Richie Hindle <
[email protected]>"
__credits__ = "Tim Peters, Neale Pickett, Tim Stone, all the Spambayes folk."
try:
True, False
except NameError:
# Maintain compatibility with Python 2.2
True, False = 1, 0
todo = """
Web training interface:
User interface improvements:
o Once the pieces are on separate pages, make the paste box bigger.
o Deployment: Windows executable? atlaxwin and ctypes? Or just
webbrowser?
o Save the stats (num classified, etc.) between sessions.
o "Reload database" button.
New features:
o Online manual.
o Links to project homepage, mailing list, etc.
o List of words with stats (it would have to be paged!) a la SpamSieve.
Code quality:
o Cope with the email client timing out and closing the connection.
Info:
o Slightly-wordy index page; intro paragraph for each page.
o In both stats and training results, report nham and nspam - warn if
they're very different (for some value of 'very').
o "Links" section (on homepage?) to project homepage, mailing list,
etc.
Gimmicks:
o Classify a web page given a URL.
o Graphs. Of something. Who cares what?
o NNTP proxy.
o Zoe...!
"""
import os, sys, re, errno, getopt, time, traceback, socket, cStringIO
from thread import start_new_thread
from email.Header import Header
import spambayes.message
from spambayes import Dibbler
from spambayes import storage
from spambayes.FileCorpus import FileCorpus, ExpiryFileCorpus
from spambayes.FileCorpus import FileMessageFactory, GzipFileMessageFactory
from spambayes.Options import options, get_pathname_option
from spambayes.UserInterface import UserInterfaceServer
from spambayes.ProxyUI import ProxyUserInterface
from spambayes.Version import get_version_string
# Increase the stack size on MacOS X. Stolen from Lib/test/regrtest.py
if sys.platform == 'darwin':
try:
import resource
except ImportError:
pass
else:
soft, hard = resource.getrlimit(resource.RLIMIT_STACK)
newsoft = min(hard, max(soft, 1024*2048))
resource.setrlimit(resource.RLIMIT_STACK, (newsoft, hard))
# exception may be raised if we are already running and check such things.
class AlreadyRunningException(Exception):
pass
# number to add to STAT length for each msg to fudge for spambayes headers
HEADER_SIZE_FUDGE_FACTOR = 512
class ServerLineReader(Dibbler.BrighterAsyncChat):
"""An async socket that reads lines from a remote server and
simply calls a callback with the data. The BayesProxy object
can't connect to the real POP3 server and talk to it
synchronously, because that would block the process."""
lineCallback = None
def __init__(self, serverName, serverPort, lineCallback):
Dibbler.BrighterAsyncChat.__init__(self)
self.lineCallback = lineCallback
self.request = ''
self.set_terminator('\r\n')
self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
# create_socket creates a non-blocking socket. This is not great,
# because then socket.connect() will return errno 10035, because
# connect takes time. We then don't know if the connect call
# succeeded or not. With Python 2.4, this means that we will move
# into asyncore.loop(), and if the connect does fail, have a
# loop something like 'while True: log(error)', which fills up
# stdout very fast.
self.socket.setblocking(1)
try:
self.connect((serverName, serverPort))
except socket.error, e:
error = "Can't connect to %s:%d: %s" % (serverName, serverPort, e)
# Some people have their system setup to check mail very
# frequently, but without being clever enough to check whether
# the network is available. If we continually print the
# "can't connect" error, we use up lots of CPU and disk space.
# To avoid this, if not verbose only print each distinct error
# once per hour.
# See also: [ 1113863 ] sb_tray eats all cpu time
now = time.time()
then = time.time() - 3600
if error not in state.reported_errors or \
options["globals", "verbose"] or \
state.reported_errors[error] < then:
print >>sys.stderr, error
# Record this error in the list of ones we have seen this
# session.
state.reported_errors[error] = now
self.lineCallback('-ERR %s\r\n' % error)
self.lineCallback('') # "The socket's been closed."
self.close()
else:
self.socket.setblocking(0)
def collect_incoming_data(self, data):
self.request = self.request + data
def found_terminator(self):
self.lineCallback(self.request + '\r\n')
self.request = ''
def handle_close(self):
self.lineCallback('')
self.close()
class POP3ProxyBase(Dibbler.BrighterAsyncChat):
"""An async dispatcher that understands POP3 and proxies to a POP3
server, calling `self.onTransaction(request, response)` for each
transaction. Responses are not un-byte-stuffed before reaching
self.onTransaction() (they probably should be for a totally generic
POP3ProxyBase class, but BayesProxy doesn't need it and it would
mean re-stuffing them afterwards). self.onTransaction() should
return the response to pass back to the email client - the response
can be the verbatim response or a processed version of it. The
special command 'KILL' kills it (passing a 'QUIT' command to the
server).
"""
def __init__(self, clientSocket, serverName, serverPort):
Dibbler.BrighterAsyncChat.__init__(self, clientSocket)
self.request = ''
self.response = ''
self.set_terminator('\r\n')
self.command = '' # The POP3 command being processed...
self.args = [] # ...and its arguments
self.isClosing = False # Has the server closed the socket?
self.seenAllHeaders = False # For the current RETR or TOP
self.s
- 1
- 2
- 3
- 4
- 5
前往页