fixes for comments
[iankelling.org] / _site / cgi / comment
1 #!/usr/bin/env ruby
2 # encoding: utf-8
3 # Copyright (C) 2019 Ian Kelling
4 # SPDX-License-Identifier: AGPL-3.0-or-later
5
6 # debian sets LANG=C when starting apache2.
7 # the envoding comment above fixes the internal encoding afaik,
8 # Found this at
9 # https://stackoverflow.com/questions/20521371/set-utf-8-as-default-for-ruby-1-9-3
10 # also note man ruby's -E arg.
11 Encoding.default_external = Encoding::UTF_8
12
13 require 'cgi'
14 require 'fileutils'
15 require 'time'
16 require 'sqlite3'
17 Dir.chdir(File.join(File.dirname(__FILE__), '..'))
18
19 require '../b'
20 include B
21
22
23 # constanty things
24 DEBUG = true
25 CAPTCHA = -> {
26 c = []
27 x = (<<EOF).split("\n")
28 What does a dog wag?
29 tail
30 Would you sleep better on a bed or a keyboard?
31 bed
32 Are there more or less than one million people on the Earth?
33 more
34 Which of Lilly and Robert is more commonly a woman’s name?
35 lilly
36 Which word has fewer letters, adorable or fox?
37 fox
38 What is the normal color of milk?
39 white
40 Which of Iceland and Turkey is an island nation?
41 iceland
42 Is a filesystem like a tree or a rose?
43 tree
44 What character is a tilde?
45 ~
46 Which is brighter, the moon or the sun?
47 (the )?sun
48 Which is closer, the moon or the sun?
49 (the )?moon
50 What language is this sentence written in?
51 english
52 What animal says "meow" and catches mice?
53 cat
54 What animal quacks and has webbed feet?
55 duck
56 Which are better fliers: worms or birds?
57 birds
58 Which typically runs first: a kernel or a web browser?
59 kernel
60 EOF
61 while x.length > 0
62 c << x.pop(2)
63 end
64 c
65 }[]
66
67 def do_captcha
68 captcha_q = CAPTCHA.sample[0]
69 puts "Content-type: text/html\n\n"
70 puts skel("#{DN}/captcha", <<EOF, header: ' / <a href="/blog.html">blog</a> / captcha')
71 <p>Hello friend. I haven't read a post from #{IP}, and I only remember for a few months, so:</p>
72 <p>#{captcha_q}</p>
73
74 <form action="/cgi/comment" method="post">
75 <input class="misc-comment-input" type="text" name="url">
76 <input name="goto" type="hidden" value="#{GOTO}">
77 <input name="question" type="hidden" value="#{captcha_q}">
78 <input name="answer">
79 <br>Your comment:
80 <textarea rows="10" name="comment" maxlength="1000">#{COMMENT_TXT}</textarea>
81 <input type="submit" value="Submit">
82 </form>
83 EOF
84 exit 0
85 end
86
87
88 def fail(msg)
89 if DEBUG and msg
90 puts "Content-type: text/plain\n\n"
91 puts msg
92 else
93 redir
94 end
95 exit 0
96 end
97
98 def redir
99 puts 'Status: 302 Found'
100 puts "Location: #{GOTO}#comment-section\n\n"
101 exit(0)
102 end
103
104
105 def bn(*args)
106 File.basename *args
107 end
108
109
110 ###### begin error checking & arg parsing ######
111 cgi = CGI.new
112 IP = cgi.remote_addr
113
114 if cgi.has_key?('goto')
115 GOTO = cgi['goto']
116 else
117 GOTO = '/'
118 fail('redir to /')
119 end
120
121 if (cgi.has_key?('url') && cgi['url'] != "") || ! cgi.has_key?('comment')
122 fail("comment not in form or url in form. cgi.params: #{cgi.params}")
123 end
124
125 COMMENT_TXT = cgi["comment"]
126
127
128 if COMMENT_TXT.length > 1000 or GOTO.length > 150
129 fail('length of comment or goto is too great')
130 end
131
132 if COMMENT_TXT.length <= 2 or COMMENT_TXT =~ /\A\s*\Z/
133 fail('not enough content in comment')
134 end
135
136
137 captchad = false
138 if cgi.has_key?('answer') && cgi.has_key?('question')
139 if cgi['answer'].downcase !~ /^#{CAPTCHA.to_h[cgi['question']]}$/
140 do_captcha
141 end
142 captchad = true
143 end
144
145
146 -> {
147 found = false
148 Dir.foreach('blog') do |entry|
149 next if ['.','..'].any? { |f| f == entry }
150 if GOTO == '/blog/' + entry
151 found = true
152 break
153 end
154 end
155 fail('goto entry not found') unless found
156 }[]
157 ######### end error checking & arg parsing ########
158
159
160 $db = db_init
161 state = nil
162 WHITELIST_CUTOFF = NOW - 4*DAY
163
164
165 ####### begin: state for ips we've seen before #######
166 [[5, 60], # 1 min
167 [10, 60*5], # 5 min
168 [20, 60*60], # 60 min
169 [30, 60*60*24], # 1 day
170 [60, 60*60*24*7]] # 1 week
171 .each do |max_posts, date|
172
173 if $db.execute(<<-SQL, [NOW - date])[0][0] > max_posts
174 select count(*) from c
175 where date > ? and ip = '#{IP}'
176 SQL
177 state = 'rate_limited'
178 end
179 end
180
181 state ||= 'suspect' if $db.execute(<<-SQL)[0][0] > 0
182 select count(*) from c
183 where ip = '#{IP}' and (
184 state = 'banned' or
185 state = 'rate_limited')
186 SQL
187
188 unless state
189 older_date = NOW - DAY*2
190 last_moderated = $db.execute(<<-SQL, [older_date])[-1]
191 select date from c
192 where ip = '#{IP}' and (
193 state = 'moderated' or
194 (date < ? and (state = 'timed' or state = 'known')))
195 SQL
196 last_moderated = last_moderated[0] if last_moderated
197 last_good = $db.execute(<<-SQL, [older_date])[-1]
198 select date from c
199 where ip = '#{IP}' and (
200 state = 'picked' or
201 (date < ? and (state = 'timed' or state = 'known')))
202 SQL
203 last_good = last_good[0] if last_good
204 if last_moderated && last_good
205 if last_good > last_moderated
206 state = 'known'
207 else
208 # these 2 waiting conditions are not actually needed,
209 # since waiting is the default, but meh.
210 state = 'waiting'
211 end
212 elsif last_moderated
213 state = 'waiting'
214 elsif last_good
215 state = 'known'
216 end
217 end
218 ####### end: state for ips we've seen before #######
219
220 ####### begin: whitelist checking #########
221 glob = "../blog/#{'?'*'YYYY-MM-DD-'.length}#{bn GOTO, '.*'}.md"
222 md_file = Dir[glob][0]
223 unless state
224 b = bn(md_file,'.*')
225 post_date = Time.parse(b[0..DATE_LEN]).to_i
226 if post_date > WHITELIST_CUTOFF
227 state = 'timed'
228 end
229 end
230 ###### end: whitelist checking ########
231
232 state ||= 'waiting'
233
234 if state != 'known' && ! captchad
235 do_captcha
236 end
237
238
239 # states:
240 # timed
241 # # was posted a whitelist period, so automatically posted.
242 # # whitelist periods are per page times when legit comments are
243 # # much more likely than spam, so we automatically let comments through.
244
245 # known
246 # # ip posted good comment before: either, one in picked state, or
247 # # a timed/known comment which is over 2 days old (I saw it and didn't remove
248 # # it)
249
250 # picked
251 # # manually marked as a good comment, so publish it.
252
253 # rate_limited
254 # # posting too much, consider them a spammer.
255
256 # moderated
257 # # bad comment, but don't ban them
258
259 # banned
260 # # all comments from this ip dead, new comment's dont even go into the db.
261
262 # waiting
263 # # waiting for manual moderation, get's posted automatically if there
264 # # is none in 24 hours
265
266 # suspect
267 # # had a bad post in the past. does not
268 # # automatically get posted in time without moderation.
269
270
271 # any of the manual states
272 date = $db.execute(<<-SQL)[0][0]
273 select max(date) from c where
274 state = 'moderated' or
275 state = 'banned' or
276 state = 'picked'
277 SQL
278
279 # not the bad automatic states
280 query = <<-SQL
281 select count(*) from c where
282 state != 'rate_limited' and
283 state != 'suspect'
284 SQL
285
286 $db.execute('insert into c values (NULL, ?, ?, ?, ?, ?)',
287 [state,
288 IP,
289 NOW,
290 GOTO,
291 COMMENT_TXT])
292
293 if date
294 new_count = $db.execute(query + 'and date > ?', date)
295 else
296 new_count = $db.execute(query)
297 end
298 new_count = new_count[0][0]
299
300 if new_count == 1
301 require 'net/smtp'
302 to = 'root@localhost'
303 from = 'www-data@' + FQDN
304 server = 'localhost'
305 msg = <<END_OF_MESSAGE
306 From: h <#{from}>
307 To: <#{to}>
308 Subject: new comment on #{FQDN}
309
310 empty body
311 END_OF_MESSAGE
312 Net::SMTP.start(server) do |smtp|
313 smtp.send_message msg, from, to
314 end
315 end
316
317 post(md_file)
318
319 redir