#!ruby -w
# cbstream.rb -- perlmonks chatterbox stream script
# http://russell2.math.bme.hu/~ambrus/sc/cbstream/
# Copyright (C) Zsbán Ambrus 2006
#
# NOTES:
#  channel #cbstream has chanserv mlock +n; #cbriver has mlock +n.
#  freenode is listening on ports 6665-6667,7000,7070,8000-8002
#  for freenode docs: 
#    ports on the server page, faq on registration mention email, usage chanmode +m-n, usage +r logged in unidentified
#  there are such commands as nickserv and chanserv
#
# TODO:
#  + BUG: binmode stdin in decode-log
#  + BUG: log correct server _before reconnecting_ in case of error 
#  + for [tye]'s request, make cbstream log pm webserver lags ?
#  + back to flock
#  + change http open wait times
#  - whenever you encounter an error, code to handle it gracefully
#  + doc website
#  + panic button
#  + demonize
#  + handle perlmonks errors, auto change server
#  + nice
#  + channel mode ?
#  + chanserv voice self
#  - nickserv ghost?
#  + pong
#  + encode messages
#  + limit throughput
#  + handle irc errors
#  + memory rlimit
#  - automatically detect if it's running at home or in ?
#  - allow nick change for people on the channel ?
#  - limit throughput by bytes, but implement in a better way
#  - read pm inbox
#  - allow sending pm private messages by irc private message
#  + detect errors sending pm message
#  - use CAPAB IDENTIFY-MSG, see http://freenode.net/Why_NOIDPREFIX.shtml ?
#  + refresh after cbupstream sends a message
#  - improve html rendering
#  + do something about lines prolonged too much by html formatting
#  + short display of links addressing monks ?
#  + collapse spaces from html
#  + add timestamps to log messages
#  - join other channels, like #buubot, and possibly hide this with the usermode ?
#  - refresh more often when the cb is not empty
#  + change login system to a +mz channel instead of memoserv
#  + revert showing code tags - people don't like just spaces
#  - major code cleanup
#  - at least refactor copy-paste code
#  - customizable wrapping long upstream lines ?
#  - support 1252 downstream if perlmonks doesn't
#  - support 1252 upstream
#  - computer-readable feed with alternate representations on msg ?
#  + limit login flooding
#  - make startup faster by waiting to particular replies: join reply, mode +o reply, and umode +E reply.
#  - send less NAMES replies: only send the next many collapsed to one after the last end of names reply or timeout ?
#  - if the new ircd comes, support namesx: http://www.inspircd.org/wiki/Modules/namesx ;
#      instead of namesx, it has "cap req multi-prefix" and then you just get multi-prefix in ordinary names replies 
#  - the above requires parsing MODE statements, in which headers help, see 
#      http://www.irc.org/tech_docs/005.html
#  - on ircd-seven, use umode +R instead of +E
#  - implement secure plogin, using either whois queries, or maybe identify-msg.
#      test to make sure it's indeed secure.
#  - more compressed logfile format, possibly recompress old logfile as well
#  + add switches to decode-log to decode only parts of the logfile
#  - make decode-log use nicer date format
#  + add third webserver
#


DATADIR = "/home/ambrus/local/cbstream/lib/cbstream";
AUTHDIR = "/home/ambrus/local/cbstream/lib/cbstream/auth";
VARLOCKDIR = "/home/ambrus/local/cbstream/var/lock";
WEBDIR = "/home/ambrus/public_html/sc/cbstream";
ALLOWFILENAME = VARLOCKDIR + "/cbstream.allow";
PIDLOCKFILENAME = VARLOCKDIR + "/cbstream.pidlock";
LAGLOGFILENAME = WEBDIR + "/cbstream.log";
IRCPASSFILENAME = AUTHDIR + "/ircpass";
ENTITIESFILENAME = DATADIR + "/entities.txt";


require "net/http"; 
require "rexml/document"; 
require "socket";
require "thread";
require "cgi";
require "iconv";


begin # local

	Dir.chdir "/";
	Process.setpriority Process::PRIO_PROCESS, 0, 5;
	Process.setrlimit Process::RLIMIT_AS, 32*1024*1024;
	Thread::abort_on_exception = 1;

	if "-d" == $*[0];
		puts "! daemonizing";
		$stdin.reopen "/dev/null", "r+";
		$stdout.reopen $stdin;
		$stderr.reopen "errout", "w";
		Process.fork and
			Process.exit!(0);
		Process.setsid;
	end;

	Thread.new do 62.times do sleep 24*60*60; end; fail "master timeout"; end;

	"1.8.7" == RUBY_VERSION or fail "running on wrong ruby version #{RUBY_VERSION} instead of 1.8.7";
	"russell2" == `hostname`.chomp or fail "running on the wrong host";
	@lockfile = File.new(PIDLOCKFILENAME, "r+");
	@lockfile.flock File::LOCK_EX | File::LOCK_NB or # note: this doesn't work over nfs
		fail "another instance of cbstream is already running";
	@lockfile.truncate 0;
	@lockfile.puts Process.pid;
	@lockfile.flush;
	at_exit do
		@lockfile.truncate 0;
	end;

	# The allowfile can be deleted if you want to kill cbupstream.  It's now checked on http.
	@http_headers = Hash["User-Agent", "cbstream ([ambrus], http://russell2.math.bme.hu/~ambrus/sc/cbstream/)"];
	if false;
		@allowfile_conn = Net::HTTP.new "russell2.math.bme.hu";
		@allowfile_conn.open_timeout = 25;
		@allowfile_conn.read_timeout = 25;
		@allowfile_lock = Mutex.new;
		def check_allow;
			@allowfile_lock.synchronize do
				resp = @allowfile_conn.head("/~ambrus/cbstream-allow/allow", @http_headers);
				Net::HTTPSuccess === resp or
					fail "external shutdown: allowfile removed";
			end;
		end;
	else
		@allowfile_name = ALLOWFILENAME;
		def check_allow;
			test ?f, @allowfile_name or
				fail "external shutdown: allowfile removed";
		end;
	end;
	check_allow;
	Thread.new do loop do sleep 300; 
		check_allow; 
		puts "@ " + Time.now.strftime("%F %T");
	end; end;
	
	class String;
		def uncontrol; tr("\x00-\x1f\x7f-\x9f", " "); end;
		def uncontrol_utf8; tr("\x00-\x1f", " "); end;
	end;
	@laglog = File.open LAGLOGFILENAME, "r+";
	@laglog.write [0xfd1d, 0xe00f, 0xf4b7, 75].pack("vvvv");
	@laglog.seek((15 + @laglog.stat.size() & ~15), IO::SEEK_SET);
	def laglogprint server, code, time = Time.now;
		if 0 != (code & (1 | 2048)); fail "error: laglog code overlaps with server number"; end;
		servercode = ((server & 1) | (if 2 <= server; 2048 else 0 end));
		@laglog.write [time.tv_sec, time.tv_usec / 1000, (code.to_i | servercode)].pack("Vvv");
		@laglog.flush;
	end;
	
end;


class TimedRateLimiter;
	# not completely reliable if called from multiple threads
	# rate is unit per second
	def initialize rate, max, credit;
		@rate, @max = rate, max;
		0 < rate or fail "invalid rate for TimedRateLimiter";
		0 < max or fail "invalid max for TimedRateLimiter";
		@time = Time.now;
		@tally = if credit < 0; 0 else credit end;
		@lock = Mutex.new;
	end;
	def update;
		time = Time.now;
		@tally += (time - @time) * @rate;
		if @max < @tally; @tally = @max; end;
		@time = time;
	end;
	def consume amt = 1;
		slp = ();
		@lock.synchronize do
			0 <= amt or 
				amt = 0;
			update;
			if @tally < amt;
				slp = Float(amt - @tally) / @rate;
			end;
		end;
		if slp 
			sleep slp;
		end;
		@lock.synchronize do
			@tally -= amt;
		end;
		true;
	end;
	def try_consume amt = 1;
		r = false;
		@lock.synchronize do
			0 <= amt or 
				amt = 0;
			update;
			if amt <= @tally;
				@tally -= amt;
				r = true;
			end;
		end;
		r;
	end;
end;


begin # irc->http interface

	@cbqueue = Queue.new;
	def cbputs pmuser, pmpass, msg, ircnick;
		while 150 < @cbqueue.length; @cbqueue.shift; end;
		@cbqueue.push [pmuser, pmpass, msg, ircnick];
	end;

end;


begin # irc

	#IRCTOPIC_NEWS = "please test the new login mechanism http://www.perlmonks.org/?node_id=702082";
	#IRCTOPIC_NEWS = "persistent login is temporarily disabled because of two old security bugs in cbstream found recently, you have to relogin every time you rejoin #cbstream, see http://www.perlmonks.org/?node_id=724058";
	IRCTOPIC_NEWS = nil;
	#IRCLOGINTOPIC_NEWS = "persistent login is temporarily disabled, will later be enabled with minor changes, see http://www.perlmonks.org/?node_id=724058";
	IRCLOGINTOPIC_NEWS = nil;

	IRCNICK = "cbstream";
	IRCCHAN = "#cbstream"
	IRCLOGINCHAN = "#cbstream-login"

	IRCTOPIC_BASE = "bidirectional perlmonks.org chatterbox bridge | FAQ: http://russell2.math.bme.hu/~ambrus/sc/cbstream/";
	IRCLOGINTOPIC_BASE = "login channel for cbstream | FAQ: http://russell2.math.bme.hu/~ambrus/sc/cbstream/";
	IRCTOPIC = IRCTOPIC_BASE + (if IRCTOPIC_NEWS; " | " + IRCTOPIC_NEWS else "" end);
	IRCLOGINTOPIC = IRCLOGINTOPIC_BASE + (if IRCLOGINTOPIC_NEWS; " | " + IRCLOGINTOPIC_NEWS else "" end);
	IRCEITHERCHAN = Hash[IRCCHAN.downcase, true, IRCLOGINCHAN.downcase, true];
	def irceitherchan chan; IRCEITHERCHAN[chan.downcase]; end;

	@ircputsqueue = Queue.new;
	def ircputs *msgs;
		msgs.each do |msg|
			m = msg.tr("\r\n", "  ");
			@ircputsqueue.push m;
		end;
	end;
	@ircauth = Hash[];
	@memoserv_state = :uninited;
	@memoserv_time = nil;
	@voiced = Hash[];
	@warned_cannottalk = Hash[];
	@warned_borged = Hash[];

	USAGEREPLY = [
		"Cbstream is a bidirectional bridge between the perlmonks chatterbox and irc.  To read the chatterbox messages, just join #{IRCCHAN} .",
		"Then, if you want to talk as well, identify with nickserv, join #{IRCLOGINCHAN} as well and there say `login your-perlmonks-username your-perlmonks-password', then talk on #{IRCCHAN} .",
		"For more information, read the faq at http://russell2.math.bme.hu/~ambrus/sc/cbstream/ .",
	];
	def logout user, strong;
		user =~ /\A([^!]+)/ or fail;
		u = $1.downcase;
		strong || @ircauth[u] && !@ircauth[u][2] and
			@ircauth.delete(u) and
			puts "! logout #{u}";
		@warned_cannottalk.delete(u);
		strong and
			ircputs "PRIVMSG #{u.uncontrol} :You are now logged out";
	end;
	@login_limiter = TimedRateLimiter.new 0.1, 100, 100;
	@usage_limiter = TimedRateLimiter.new 0.01, 100, 20;
	def commandgot mode, user, string;
		# mode is how we got the command: one of :priv, :memo, :loginchan
		#puts "! got command in #{mode} from nick #{user.ucontrol}"
		if string =~ /\A(?:\s*(?:send\s+)??(?:cbstream|cbriver))??\s*(p)?login\s+(?:\[([^\]]+)\]|(\S+))\s+(?:\[([^\]]+)\]|(\S+))(?!=\S)/i &&
			(:memo == mode || :loginchan == mode);
			if @login_limiter.try_consume 1;
				IRCNICK.downcase == user and fail "refusing to login special user #{user}";
				pmuser = $2 || CGI.unescape($3);
				pmpass = $4 || CGI.unescape($5);
				persistent = if $1; true else false end;
				persistent = false;
				user or fail "commandgot called with no user";
				puts "! #{if persistent; "p" else "" end;}login command in #{mode} nick #{user.uncontrol} pmuser #{pmuser.uncontrol}";
				@ircauth[user] = [pmuser, pmpass, persistent];
				ircputs "PRIVMSG #{user.uncontrol} :You are now #{if persistent; "persistently " else "" end;}logged in as perlmonks user #{pmuser.uncontrol}";
				if :memo == mode;
					ircputs "PRIVMSG #{user.uncontrol} :Instead of logging in with a memo, it is recommended that you use the new login method: join #{IRCLOGINCHAN} as well and there say `login your-perlmonks-username your-perlmonks-password'.";
				end;
			end;
		elsif string =~ /\A(?:\s*(?:send\s+)??(?:cbstream|cbriver))??\s*(?:forget|logout|login|plogout|plogin)\b/i;
			logout user, true;
			if @login_limiter.try_consume 1;
				puts "! explicit logout command in #{mode} nick #{user}";
			end;
		elsif string =~ /\A(?:\s*(?:send\s+)??(?:cbstream|cbriver))??\s*(?:help|halp|usage)\b/i;
			if @usage_limiter.try_consume 1;
				USAGEREPLY.each do |x|
					ircputs "PRIVMSG #{user.uncontrol} :" + x;
				end;
			end;
		end;
	end;

	def ircgot origin, command, args, full;
		case command;
			when "PING";
				ircputs "PONG :" + args.join(" ");
			when "477";
				# ignore -- [freenode-info] messages
			when "401"; # ERR_NOSUCHNICK
				if "ChanServ".downcase == args[1].downcase || "NickServ".downcase == args[1].downcase || "MemoServ".downcase == args[1].downcase;
					fail "error accessing services: #{full}";
				end;
				# ignore -- tried to send to a user who's since quit
			when "ERROR", /\A[4-5]/;
				fail "error from irc client: #{full}";
			when "KICK";
				p args;
				if irceitherchan(args[0]) && 
					IRCNICK.downcase == args[1].downcase;
					fail "irc kick me: #{full}";
				elsif IRCCHAN.downcase == args[0].downcase;
					@voiced.delete(args[0].downcase);
					logout args[1], false;
				end;
			when "PART", "QUIT";
				if IRCCHAN.downcase == args[0].downcase;
					@voiced.delete(origin.gsub(/\!.*/m) { "" }.downcase); 
					logout origin, false;
				end;
			when "JOIN";
				if IRCCHAN.downcase == args[0].downcase;
					logout origin, false;
				end;
			when "MODE";
				if IRCCHAN.downcase == args[0].downcase;
					ircputs "NAMES #{IRCCHAN}";
				end;
			when "NICK";
				logout origin, false;
				ircputs "NAMES #{IRCCHAN}";
			when "353"; # RPL_NAMREPLY
				if IRCCHAN.downcase == args[2].downcase;
					args[3].scan(/(\S+)/) do
						$1 =~ /\A([+@])?(\S+)\z/ or fail "error parsing names reply";
						if $1; 
							@voiced[$2.downcase] = true; 
						else 
							@voiced.delete($2.downcase); 
						end;
					end;
				end;
			when "PRIVMSG";
				origin or return;
				origin =~ /\A([^!]+)/ or fail;
				user = $1.downcase;
				if IRCNICK.downcase == args[0].downcase;
					commandgot :priv, user, args[1];
				elsif IRCLOGINCHAN.downcase == args[0].downcase;
					commandgot :loginchan, user, args[1];
					# CONT: enable using loginchan
				elsif IRCCHAN.downcase == args[0].downcase;
					if !(@voiced[user] || IRCNICK.downcase == user);
						if auth = @ircauth[user];
							msg = args[1];
							msg.sub!(/\A\x01ACTION\s(.*)\x01\z/m) { "/me " + $1 };
							msg.empty? and return;
							cbputs auth[0], auth[1], msg, user;
						else
							@warned_cannottalk[user] or
								ircputs "PRIVMSG #{user.uncontrol} :No-one can hear you talking on #cbstream unless you log in to #{IRCNICK}.  Try joining #{IRCLOGINCHAN} and then there say `login your-perlmonks-user your-perlmonks-password'.";
							@warned_cannottalk[user] = 1;
						end;
					end;
				end;
			when "NOTICE";
				if origin && origin =~ /\AMemoServ!/i;
					# this section of code seems somewhat fragile.  it could get into an infinite loop.
					#puts "! memoserv-state #{@memoserv_state} #{@memoserv_time && @memoserv_time < Time.now}";
					if :rest == @memoserv_state ||
						@memoserv_time && @memoserv_time < Time.now;
						case args[1];
							when /\A\x02Memo (\d+) - Sent by (\S+), /;
								@memoserv_state, @memoserv_time, @memoserv_user = :hrule, (Time.now + 120), $2.downcase;
							when /\AYou have a new memo from /;
								ircputs "PRIVMSG MemoServ :READ NEW", 
									"PRIVMSG MemoServ :DELETE 1";
						end;
					elsif :hrule == @memoserv_state;
						if args[1] =~ /\A----------/;
							@memoserv_state, @memoserv_time = :read, (Time.now + 120);
						end;
					elsif :read == @memoserv_state;
						#puts "! got memo from user #{@memoserv_user.uncontrol}"
						commandgot :memo, @memoserv_user, args[1];
						@memoserv_state, @memoserv_time, @memoserv_user = :rest, nil, nil;
					else
						# no-op
					end;
				end;
		end;
	end;

	sleep 1;
	check_allow;
	@ircconn = Socket.new(Socket::PF_INET, Socket::SOCK_STREAM, 0);
	begin
		ircport, irchost = "6667", "irc.freenode.net";
		ircportn = Socket.getservbyname(ircport);
		hostdat = Socket.gethostbyname(irchost);
		Socket::AF_INET == hostdat[2] or fail "error getting address of irc server #{irchost}: wrong protocol";
		irchostn = (irchostns = hostdat[3..-1])[rand(irchostns.size)]; # note: irchostn = hostdat.drop(3).sample; in ruby-1.9
		ircsockaddr = [Socket::AF_INET, ircportn, irchostn].pack("sna4x8");
		ircports, irchosts = Socket.unpack_sockaddr_in(ircsockaddr);
		puts "! about to connect to tcp #{irchost}/#{ircport} (#{irchosts}/#{ircports})";
		@ircconn.connect ircsockaddr;
		puts "! connection succeeded";
	end;
	Thread.new do 
		while l = @ircconn.gets; 
			l.gsub!(/[\r\n]*\z/) { "" };
			puts "< " + l.\
				sub(/(\bcb(?:up)?stream\b.*(?:login|\bp?lo.?[gi]|[og].?in\b)).*/im) { $1 + " ???" }.\
				gsub(/[\x00-\x1f~\x7f-\x9f]/) { "~%02x" % $&[0] };
			l =~ /\S/ or next;
			l =~ /\A(?:\:([^ ]*)\ +)?(.*?)(?:\ \:(.*))?\z/ or 
				fail "error parsing message from irc server";
			origin, mostargs, trailingarg = $1, $2.upcase, $3, $4;
			command, *args = mostargs.scan(/[^ ]+/);
			command =~ /\A\w+\z/ or 
				fail "error parsing2 message from irc server";
			trailingarg and args << trailingarg;
			ircgot(origin, command, args, l);
		end; 
		fail "irc server disconnected"; 
	end;
	IRC_PUTS_BYTE_RATE = 25;
	IRC_PUTS_BYTE_LINEOVERHEAD = 8;
	IRC_PUTS_BYTE_MAXCREDIT = 2000;
	IRC_PUTS_LINE_RATE = 0.25;
	IRC_PUTS_LINE_MAXCREDIT = 8;
	@ircputs_bytelimiter = TimedRateLimiter.new IRC_PUTS_BYTE_RATE, IRC_PUTS_BYTE_MAXCREDIT, IRC_PUTS_BYTE_MAXCREDIT / 2;
	@ircputs_linelimiter = TimedRateLimiter.new IRC_PUTS_LINE_RATE, IRC_PUTS_LINE_MAXCREDIT, IRC_PUTS_LINE_MAXCREDIT / 2;
	Thread.new do
		while msg = @ircputsqueue.shift;
			@ircputs_bytelimiter.consume(IRC_PUTS_BYTE_LINEOVERHEAD + msg.length);
			@ircputs_linelimiter.consume(1);
			puts "> " + msg.\
				sub(/(^pass\b|\bnickserv\b.*\bidentify\b).*/im) { $1 + " ???" }.\
				uncontrol;
			@ircconn.print msg, "\r\n";
		end;
	end;
	ircpass = File.open(IRCPASSFILENAME).readline.chomp;
	ircputs "PASS #{ircpass}", "NICK #{IRCNICK}", "USER cbstream 0 0 :http://www.math.bme.hu/~ambrus/cbstream/";
	check_allow;
	ircputs "JOIN #{IRCCHAN}", "JOIN #{IRCLOGINCHAN}";
	#ircputs "MODE #{IRCNICK} :+E"; # user mode +E forbids unidentified users to send private messages to you
	sleep 5; 
	ircputs "PRIVMSG ChanServ :OP #{IRCCHAN}", "PRIVMSG ChanServ :OP #{IRCLOGINCHAN}", 
		"PRIVMSG MemoServ :DEL ALL";
	sleep 5; # should wait to mode message confirming chanserv op instead
	ircputs \
		"MODE #{IRCCHAN} +mn", "MODE #{IRCCHAN} +tz", "MODE #{IRCCHAN} -s", 
		"MODE #{IRCLOGINCHAN} +mn", "MODE #{IRCLOGINCHAN} +tz", "MODE #{IRCLOGINCHAN} -s", 
		"TOPIC #{IRCCHAN} :" + IRCTOPIC,
		"TOPIC #{IRCLOGINCHAN} :" + IRCLOGINTOPIC;
	@memoserv_state = :rest;

end;

begin # perlmonks common

	@http_servers = ["209.197.123.153", "66.39.54.27", "216.92.34.251"];
	@http_refresh_lock = Mutex.new;
	@http_refresh_cond = ConditionVariable.new;
	@http_refresh_curious = ();
	class PageLoadError < StandardError; end;

end;


Thread.new do # up to perlmonks

	def latin1_from_utf8_html s; # convert from utf8 to latin1, ampersand-escaping bad characters
		String === s or fail "function latin1_from_utf8_html wants a string argument";
		t = s.gsub(/([\xc4-\xff][\x80-\xbf]{1,5})/) { "&\#x%x;" % $1.unpack("U") };
		begin
			u = Iconv.conv("iso-8859-1", "utf-8", t);
		rescue Iconv::IllegalSequence;
			u = s;
		end;
		u;
	end;

	@up_http_headers = @http_headers.dup;
	@up_http_headers["Content-Type"] = "application/x-www-form-urlencoded";
	@up_http_serverno = 0;
	@up_errorstep = 0;
	def up_http_connect
		@up_http_serverno = (@up_http_serverno + 1) % @http_servers.size;
		@up_http_curserver = @http_servers[@up_http_serverno];
		@up_http_conn = Net::HTTP.new @up_http_curserver; 
	 	@up_http_conn.open_timeout = [15, 30, 30, 30, 60, 60, 60, 60][@up_errorstep] || 4*60;
	 	@up_http_conn.read_timeout = 3*60;
		@up_extrasleep = 5;
		@up_serverchange_time = Time.now + 300*60 + 16*60*rand();
	end;
	up_http_connect;
	up_http_flood_limiter = TimedRateLimiter.new 1.0/2, 8, 2;
	params = ();
	puts "! entering up-main loop";

	loop do;

		sleep [0.5, 2, 30, 30, 60, 60, 120, 120][@up_errorstep] || 4*60;
		sleep @up_extrasleep;
		up_http_flood_limiter.consume 1;
		body = ""; 
		0 == @up_errorstep and
			params = @cbqueue.shift; # queue shift is blocking
		pmuser, pmpass, msg, nick = params;
		msg1 = latin1_from_utf8_html(msg);
		epmuser, epmpass, emsg = CGI.escape(pmuser), CGI.escape(pmpass), CGI.escape(msg1);
		check_allow;
		puts "! talk user #{epmuser} nick #{nick.uncontrol} message #{emsg} host #{@up_http_curserver}";
		begin
			stime = Time.now;
			resp = @up_http_conn.post(
				"/?", 
				"node_id=227820;op=login;user=#{epmuser};passwd=#{epmpass};op=message;message=#{emsg}", 
				@up_http_headers);
			Net::HTTPServerError === resp and
				fail PageLoadError;
			Net::HTTPSuccess === resp or 
				fail "cb up-blab http error: #{resp.message}";
			body = resp.body;
			@up_extrasleep = (Time.now - stime).abs;
			body.gsub!(/<\/?i>/i) { "" };
			body.sub!(/\A\s*/) { "" };
			case body;
				when /\AChatter Accepted/i;
					if msg1 =~ /\S/ && msg1 !~ /\A\/(?:ignore|unignore|chatteron|chatteroff)\w/;
						# probably sent message, refresh cbstream
						puts "! message sent, signalling downstream to refresh";
						@http_refresh_lock.synchronize do
							@http_refresh_curious = 1; # curious event
							@http_refresh_cond.broadcast;
						end;
					else
						(); # null message or special command
					end;
				when /\AYou can.*logged/i;
					puts "! wrong login #{nick.uncontrol}";
					ircputs "PRIVMSG #{nick.uncontrol} :Cannot talk: wrong perlmonks username or password. Try to login again.";
				when /\ASend failed/i;
					ircputs "PRIVMSG #{nick.uncontrol} :Send failed, see Message Inbox";
				when /\AYour words echo back hollowly from /;
					puts "! sendchatter reports user tried to talk when is borged";
					warned_borged = (@warned_borged[pmuser] ||= TimedRateLimiter.new 1.0/(60*10), 1, 1);
					if warned_borged.try_consume 1;
						ircputs "PRIVMSG #{nick.uncontrol} :Your words echo back hollowly from the walls of the [ http://www.perlmonks.org/?node_id=56087 |oubliette]";
					end;
				when /\AYou said/i;
					ircputs "PRIVMSG #{nick.uncontrol} :Message sent";
				when /\AWhoa, Cowboy/i;
					ircputs "PRIVMSG #{nick.uncontrol} :Whoa, Cowboy!  You said that already.  Don't 'reload'?";
				when /\ANo chatter/i;
					puts "! sendchatter reports no chatter, shouldn't happen, means wrong cgi parameters";
				else
					puts "! sendchatter gives unknown message: #{body[0, 256].uncontrol}";
			end;
		rescue \
				TimeoutError, 
				Errno::EHOSTUNREACH, Errno::ECONNRESET, Errno::ECONNREFUSED, Errno::ENETUNREACH, Errno::ETIMEDOUT,
				EOFError, 
				PageLoadError;
			puts "! error loading up-blab from #{@up_http_curserver}: #{$!.class}";
			(@up_errorstep += 1) < 15 or 
				fail "cannot load perlmonks up-blab ticker in #{@up_errorstep} tries";
			up_http_connect;
			redo;
		end;
		puts "! sent up-blab in #{@up_extrasleep}";
		@up_errorstep = 0;

	end;

end;


begin # down from perlmonks
	
	NAMEDENT = begin # named html entities
		r = Hash[];
		File.open(ENTITIESFILENAME).each do |l|
			l =~ /\A\s*(\w+)\s+(\w+)/ and
				r[$1] = $2.to_i(16);
		end;
		60 == r["lt"] && 62 == r["gt"] && 38 == r["amp"] && 34 == r["quot"] or 
			fail "html entity table bogus";
		r;
	end;

	def output_message m;
		msgid, author, authorid, rawmsg, htmmsg = 
			%w"message_id author author_user msgtext parsed".map do |elt|
				#Iconv.conv("iso-8859-1", "utf-8", m.get_elements(elt)[0].text);
				f = m.get_elements(elt)[0];
				if !f;
					puts "! required element #{elt} missing from message:";
					puts "!   " + m.to_s.uncontrol;
					fail "required element missing from message";
				end;
				f.text || "";
			end;
		@lastmsg = Integer(msgid); 
		fmt = format_message(false, author, authorid, htmmsg);
		if fmt.length < 384;
			ircputs "PRIVMSG #{IRCCHAN} :" + fmt.uncontrol_utf8;
		else fmt = format_message(true, author, authorid, htmmsg);
			if fmt.length <= 384;
				ircputs "PRIVMSG #{IRCCHAN} :" + fmt.uncontrol_utf8;
			elsif fmt =~ /\A(.{0,384})([^\x80-\xbf].{0,379})\z/m;
				ircputs "PRIVMSG #{IRCCHAN} :" + $1[0, 384].uncontrol_utf8,
					"PRIVMSG #{IRCCHAN} :... " + $2[0, 380].uncontrol_utf8;
			else
				puts "! falling back to raw message";
				fmt = format_message_raw(author, authorid, rawmsg);
				ircputs "PRIVMSG #{IRCCHAN} :" + fmt.uncontrol_utf8[0, 384];
			end
		end;
	end;

	def format_author author, authorid;
		r = if author =~ /\A([ -\\^-{}~]){1,24}\z/;
			author
		else
			authorid + "|" + author[0, 14].tr("]\n", "  ");
		end;
		"[" + r + "] ";
	end;

	def format_message shorter, author, authorid, htmmsg;
		o = format_author(author, authorid);
		codetag = 0;
		htmmsg =~ %r"\A/me <i>\s?(.*)</i>\z"m and
			htmmsg = "/me " + $1;
		toks = [];
		htmmsg.scan(%r"(?x) \G(?:
			(<[^>]+>) |
			([^<]+) 
		)") do
			if wtag = $1;
				if wtag =~ /\A<\s*(\/\s*)?(\w*)(.*)>\z/m;
					toks << [$2.downcase, wtag, $3, !$1];
				elsif wtag =~ /\A<\s*!\s*--/;
					toks << [:comment. wtag]
				else
					toks << [:badtag, wtag];
				end;
			elsif $2
				toks << [:text, $2];
			end;
		end;
		skip = 0;
		toks.each_with_index do |tok, ind|
			0 < skip and (skip -= 1; next);
			wtag, rest, open = tok[1], tok[2], tok[3];
			o << case tok[0]
				when "a";
					if open;
						attr = Hash[];
						rest.scan(/([\w\-._:]+)(?:=(?:\"([^\"]*)\"|\'([^\']*)\'|([^\s\"\']*)))?/) do
							attr[$1.downcase] = CGI.unescapeHTML($+);
						end;
						# ignore title,name,target attrs
						honest = false; textonly = false;
						if link = attr["href"];
							if toks[ind + 2] && "a" == toks[ind + 2][0] && !toks[ind +2][3] && :text == toks[ind + 1][0];
								text = toks[ind + 1][1];
								if link == CGI.unescapeHTML(text);
									skip = 1; honest = true;
								elsif (link =~ /\A\?node=([^;&]+)\z/ and CGI.unescape($1) == CGI.unescapeHTML(text));
									textonly = true;
								end;
							end;
							if link !~ /\A\w{2,16}\:/;
								shorter or
									link = "http://www.perlmonks.org/" + link;
								shorter and
									link.sub!(/\A\?node(?:_id)?=/) { "" };
							end;
						else
							link = "!";
						end;
						if textonly;
							"[";
						elsif shorter;
							"[" + link + (if honest; "" else "|" end);
						else
							"[ " + link + (if honest; " " else " |" end);
						end;
					else
						"]";
					end;
				when "b", "strong";
					"*";
				when "i", "em";
					# remove automatic i tags resulting from /me
					"/";
				when "tt"; # code tags are converted to tt
					codetag += if open; 1 else -1 end;
					if open; "<c>" else "</c>" end;
					#" ";
				when "sup";
					if open; "^{" else "}"; end;
				when "sub";
					if open; "_{" else "}"; end;
				when "u", "ins", "font";
					"+";
				when "s", "strike", "del";
					if open; "!-" else "-"; end;
				when "small";
					if open; "(" else ")"; end;
				when "q";
					"\"";
				when "span";
					"" # ignore
				when String; # any other tag
					wtag; 
				when :comment;
					wtag;
				when :badtag;
					wtag;
				when :text
					m = wtag.dup;
					m.gsub!(%r/(?x)
						\&
						(?: 
							([a-zA-Z][a-zA-Z0-9]{1,15}) | 
							\# (?: x([0-9a-fA-F]{1,16}) | ([0-9]{1,16}) ) 
						)
						(?:\;|(?=[^;\-.a-zA-Z0-9]))
					/) do
						c = (if $1;
							NAMEDENT[$1]
						elsif $2;
							r = $2.to_i(16);
						elsif $3;
							r = $3.to_i;
						end);
						if c && 0 <= c && c <= 0x7fffffff; [c].pack("U") else $& end;
					end;
					0 == codetag and # we _should_ do this even in code tags, but the html output of the ticker is buggy
						m.gsub!(/\s+/) { " " };
					m;
				else
					fail "internal error: tokenizer generated invalid type of token from cbstream ticker html";
			end;
		end;
		o;
	end;
	def format_message_raw author, authorid, rawmsg;
		o = format_author(author, authorid) + rawmsg;
	end;
	
	@http_serverno = 0;
	@errorstep = 0;
	def http_connect
		@http_serverno = (@http_serverno + 1) % @http_servers.size;
		@http_curserver = @http_servers[@http_serverno];
		@http_conn = Net::HTTP.new @http_curserver; 
	 	@http_conn.open_timeout = [15, 30, 30, 30, 60, 60, 60, 60][@errorstep] || 4*60;
	 	@http_conn.read_timeout = 3*60;
		@extrasleep = 5;
		@serverchange_time = Time.now + 300*60 + 16*60*rand();
	end;
	http_connect;
	@lastmsg = 1; 
	http_refresh_limiter = TimedRateLimiter.new 1.0/6, 3, 0;
	class EmptyBodyError < StandardError; end;
	laglogprint 0, 2; # startup
	puts "! entering main loop";

	loop do;

		sleep @extrasleep;
		sleep 1;
		if 0 == @errorstep;
			timeout_thiswait = ();
			@http_refresh_lock.synchronize do
				Thread.new do
					sleep 29;
					@http_refresh_lock.synchronize do
						timeout_thiswait = true;
						@http_refresh_cond.broadcast;
					end;
				end;
				while !timeout_thiswait && !@http_refresh_curious;
					@http_refresh_cond.wait(@http_refresh_lock);
				end;
			end;
		else
			sleep [30, 2, 30, 30, 60, 60, 120, 120][@errorstep] || 4*60;
		end;
		http_refresh_limiter.consume 1;
		check_allow;
		@http_refresh_lock.synchronize do # probably no need to lock
			@http_refresh_curious = false;
		end;
		
		puts "! trying to load the cb ticker from #{@http_curserver}";
		body = ""; doc = (); 
		begin
			@http_stime = Time.now;
			resp = @http_conn.get(
				"/?node_id=207304;xmlstyle=modern;fromid=#{@lastmsg}", 
				@http_headers);
			Net::HTTPServerError === resp and
				fail PageLoadError;
			Net::HTTPSuccess === resp or 
				fail "cb ticker http error: #{resp.message}";
			body = resp.body;
			@extrasleep = ((Time.now - @http_stime) * 1.1).abs;
			doc = REXML::Document.new(body); 
			doc.root or 
				fail EmptyBodyError;
		rescue \
				TimeoutError, 
				Errno::EHOSTUNREACH, Errno::ECONNRESET, Errno::ECONNREFUSED, Errno::ENETUNREACH, Errno::ETIMEDOUT,
				EOFError, 
				PageLoadError, REXML::ParseException, EmptyBodyError;
			puts "! error loading from #{@http_curserver}: #{$!.class}";
			REXML::ParseException === $! || EmptyBodyError === $! and
				puts "! response body was: #{body[0,256].uncontrol}";
			(@errorstep += 1) < 15 or 
				fail "cannot load perlmonks cb ticker in #{@errorstep} tries";
			lagcode = 
				case $!; 
					when TimeoutError; 18; 
					when Errno::EHOSTUNREACH; 20;
					when Errno::ECONNRESET; 26;
					when Errno::ECONNREFUSED; 28;
					when Errno::ENETUNREACH; 36;
					when Errno::ETIMEDOUT; 30;
					when EOFError; 22;
					when PageLoadError; 24;
					when REXML::ParseException; 32;
					when EmptyBodyError; 34;
					else 16;
				end;
			laglogprint @http_serverno, lagcode, @http_stime;
			http_connect;
			redo;
		end;
		puts "! loaded cb ticker in #{@extrasleep}";
		@errorstep = 0;
		timecode = begin (16 * Math.log(@extrasleep.to_f)).floor; rescue Errno::ERANGE, Errno::EDOM; -256 end;
		lagcode = 
			(1024 + 512) + 2 * (if timecode < -256; -256 elsif 255 < timecode; 255 else timecode end);
		laglogprint @http_serverno, lagcode, @http_stime;
		Time.now < @serverchange_time or
			http_connect;

		doc.root.each_element("message") do |m| 
			output_message m;
		end;

	end;

end;


__END__
