good working version
[iankelling.org] / b.rb
1 # encoding: utf-8
2 # Copyright (C) 2016 Ian Kelling
3
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU General Public License as published by
6 # the Free Software Foundation, either version 2 of the License, or
7 # (at your option) any later version.
8
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU General Public License for more details.
13
14 # You should have received a copy of the GNU General Public License
15 # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 module B # blog module
17 require 'fileutils'
18 require 'time'
19 require 'safe_yaml'
20 require 'pygments'
21 require 'sqlite3'
22 require 'redcarpet'
23 JS_INFO = "<p>All JavaScript has <a href=\"https://www.gnu.org/software/librejs/index.html\">LibreJS</a> support.</p>"
24
25 DAY = 60*60*24
26 DN = 'iankelling'
27 FQDN = DN + '.org'
28 DURL = 'https://' + FQDN
29 DESCRIPTION = "Ian Kelling's personal site and blog on software"
30 DATE_LEN = 'YYYY-MM-DD'.length
31 NOW = Time.now.to_f
32 WAIT_DATE = NOW - 60*60*24*1
33
34 def db_init
35 SQLite3::Database.new('../proposed-comments/comments.sqlite')
36 end
37
38 # from the redcarpet readme, then a bunch of googling to figure
39 # out what to do on exception.
40 class HTMLwithPygments < Redcarpet::Render::HTML
41 def block_code(code, language)
42 begin
43 Pygments.highlight(code, lexer: language)
44 rescue MentosError
45 # when language detection fails
46 Pygments.highlight(code, lexer: 'text')
47 end
48 end
49 end
50
51 def fwrite(output_path, string)
52 FileUtils.mkdir_p(File.dirname(output_path))
53 File.write(output_path, string)
54 end
55
56 def fskel(rel_path, title, content, o={})
57 head = <<EOF
58 <link rel="canonical" href="#{DURL}/#{rel_path}">
59 EOF
60 if rel_path =~ %r{^blog/.}
61 head += <<EOF
62 <link rel="alternate" type="application/atom+xml" title="#{DN}" href="#{DURL}/feed.xml">
63 EOF
64 end
65 o[:head] = head
66 fwrite(rel_path, skel(title, content, o))
67 end
68 def skel(title, content, o={})
69 # got meta viewport from jekyll's default later. It's for better
70 # mobile viewing.
71 output = <<EOF
72 <!DOCTYPE html>
73 <html lang="en-US">
74 <head>
75 <meta charset="utf-8">
76 <title>#{title}</title>
77 <link rel="stylesheet" href="/main.css">
78 <link rel="stylesheet" href="/common.css">
79 <link rel="shortcut icon" href="/assets/favicon.png" />
80 <meta name="description" content="#{o[:description] || DESCRIPTION}">
81 #{o[:head]}
82 <meta name="viewport" content="width=device-width, initial-scale=1">
83 </head>
84 <body>
85 <header>
86 <h3><a href="/">iankelling.org</a>#{o[:header]}</h3>
87 </header>
88 <div class="main-content-stripe">
89 <div class="content">
90 #{content}
91 </div>
92 </div>
93 <div id="comment-stripe">
94 #{o[:comments]}
95 </div>
96 <footer>
97 #{o[:footer]}
98 <p>This site has a <a href="/git/?p=iankelling.org;a=summary">git repo</a>. Code is gpl, content is <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/"><img id="cc-by-sa" alt="Creative Commons License" src="/assets/cc-by-sa-4.0-80x15.png" /></a> unless otherwise noted</p>
99 <p><address><a href="ian@iankelling.org">ian@iankelling.org</a> let me know what you think</address></p>
100 </footer>
101 </body>
102 </html>
103 EOF
104 output
105 end
106
107 def stdpage(page_name, content)
108 fskel("#{page_name}.html",
109 "#{DN}/#{page_name}",
110 content,
111 header: " / <a href=\"/#{page_name}.html\">#{page_name}</a>")
112 end
113
114 def md_to_html(md)
115 # Using redcarpet over kramdown because syntax highlighting is
116 # simpler. kramdown uses some crap highlighter by default,
117 # supports using rouge, but then the classes are all screwy
118 # for what pygments css expects, rouge has a pygments compatibility mode,
119 # but that is a pita to get working, then it doesn't even work right.
120 # kramdown is jekyll's default markdown parser, but it doesn't use
121 # it for code blocks, it strips them out using custom templating
122 # extension class, then uses rouge, then wraps it in some
123 # custom html for pygments compatibility. It's a complicated mess.
124 Redcarpet::Markdown.new(HTMLwithPygments, fenced_code_blocks: true).render(md)
125 end
126
127 def comment_html(comment, date)
128 inner = Redcarpet::Markdown.new(Redcarpet::Render::HTML, fenced_code_blocks: true).render(<<EOF)
129 #{comment}
130 <span class="comment-date">#{Time.at(date).strftime("%b %-d '%y")}</span>
131 EOF
132 # I tried putting the time, %I:%M %p UTC, but it looks kinda
133 # clunky, going against my simple theme.
134 <<EOF
135 <div class="comment">
136 #{inner}
137 </div>
138 EOF
139 end
140
141 def post(file, build_time=false)
142 content = File.read(file)
143 content =~ %r{\A(---\s*\n.*?\n?)^((---)\s*$\n?)}m # yaml front matter
144 # stuff after last match. jekyll uses $POSTMATCH,
145 # but it's nil for me, I don't know what magic they are using.,
146 # but only $' is listed here http://ruby-doc.org/core-2.3.1/doc/globals_rdoc.html,
147 content = $'
148
149 front = SafeYAML.load(Regexp.last_match(1))
150 title = front['title']
151 $page_title = "#{title} | #{DN}"
152 header_rel = ' / <a href="/blog.html">blog</a> /'
153
154 footer_extra = <<-EOF
155 <p><a class="icon-rss" href="/feed.xml">Subscribe</a></p>
156 EOF
157 footer_extra += JS_INFO if content =~ /<script/
158
159
160
161 b = File.basename(file,'.md')
162 # date is in the format: YYYY-MM-DD-
163 date = Time.parse(b[0..DATE_LEN])
164 rel_path = "blog/#{b[(DATE_LEN + 1)..-1]}.html"
165 comments = $db.execute <<-SQL, [WAIT_DATE]
166 select comment, date from c
167 where page = '#{rel_path}' and (
168 state = 'picked' or state = 'known'
169 or (state = 'waiting' and date < ?))
170 SQL
171 # get earliest comment. earlier ones stored in git will also be
172 # published. This get's us easily sharable comments, and allows us
173 # to expire unpublished comments and ip addresses which are PII and
174 # should never be kept around indefinitely.
175 sql_start_date = $db.execute('select min(date) from c')[0][0] || NOW
176 comment_file_dir = "../comments/#{rel_path}"
177 old_comments = Dir["#{comment_file_dir}/*"].reduce([]) do |memo, f|
178 dt = File.basename(f).to_f
179 if dt < sql_start_date
180 memo << [File.read(f), dt]
181 else
182 FileUtils.rm(f) if build_time
183 memo
184 end
185 end
186 if build_time
187 FileUtils.mkdir_p comment_file_dir
188 comments.each do |c, c_date|
189 # fyi: there is an extremely small chance of 2 comments having
190 # the same floating point time and thus overwriting each other.
191 # Small enough that it won't happen at my site's scale.
192 File.write(File.join(comment_file_dir, c_date.to_s), c)
193 end
194 # https://piwik.org/docs/privacy/ says keep logs for 3-6 months
195 $db.execute("delete from c where date < #{NOW - DAY*180}")
196 end
197 comments = old_comments + comments
198 pending_comments = $db.execute(<<-SQL, [WAIT_DATE])[0][0]
199 select count(*) from c
200 where page = '#{rel_path}' and
201 (state = 'waiting' and date > ? or state = 'suspect')
202 SQL
203
204 feed_html = md_to_html(content)
205 page_html = <<-EOF
206 <header class="post-header">
207 <h1 class="post-title">#{title}</h1>
208 <p class="post-date">#{date.strftime("%b %-d, %Y")}</p>
209 </header>
210 #{feed_html}
211 EOF
212 com_list = ''
213 comments.each { |c, date| com_list += comment_html(c, date) }
214 if pending_comments > 0
215 if pending_comments >= 2
216 text = "are #{pending_comments} new comments"
217 else
218 text = 'is 1 new comment'
219 end
220 com_list +=
221 comment_html("Note: there #{text} pending approval.", NOW)
222 end
223 com_section = <<-EOF
224 <form class="comment" action="/comment.rb" method="post">
225 <input class="misc" type="text" name="url">
226 <input name="goto" type="hidden" value="#{rel_path}">
227 <textarea rows="10" name="comment" placeholder="markdown" maxlength="1000"></textarea>
228 <input type="submit" value="Leave a comment">
229 </form>
230 <div id="comments">
231 #{com_list}
232 </div>
233 EOF
234 links = front['comment_links']
235 if links
236 link_html = links.map { |name,url| "<a href=\"#{url}\">#{name}</a>" }
237 .join(', ')
238 com_section += (<<EOF)
239 <p>More comments at #{link_html}</p>
240 EOF
241 end
242
243 blog_toc_entry = "<li><a href=\"#{rel_path}\">#{title}</a></li>"
244
245 com_section = <<EOF
246 <div id="comment-section">
247 #{com_section}
248 </div>
249 EOF
250
251
252 if front['description']
253 description = front['description']
254 else
255 # the first 300 saves ~ 1 ms
256 # regex for striping html from liquid template src
257 description = feed_html[0..300].gsub(/<script.*?<\/script>/m, '').
258 gsub(/<!--.*?-->/m, '').gsub(/<style.*?<\/style>/m, '').
259 gsub(/<.*?>/m, '')
260 if description.length > 160
261 description = description[0..156] + '...'
262 end
263 end
264
265 head = <<EOF
266 EOF
267
268 fskel(rel_path, title, page_html,
269 header: header_rel,
270 footer: footer_extra,
271 comments: com_section,
272 description: description)
273 url="#{DURL}/#{rel_path}"
274
275
276 # following from https://creativecommons.org/choose,
277 # with the addition of "unless otherwise noted", for js licenses.
278 feed_copyright = <<-EOF
279 <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-sa/4.0/88x31.png" /></a><br /><span xmlns:dct="http://purl.org/dc/terms/" href="http://purl.org/dc/dcmitype/Text" property="dct:title" rel="dct:type">#{title}</span> by <a xmlns:cc="http://creativecommons.org/ns#" href="#{url}" property="cc:attributionName" rel="cc:attributionURL">Ian Kelling</a> unless otherwise noted is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">Creative Commons Attribution-ShareAlike 4.0 International License</a>.
280 EOF
281
282 feed_entry = <<EOF
283 <entry>
284 <title>#{title}</title>
285 <link rel="alternate" href="#{url}"/>
286 <id>#{url}</id>
287 <updated>#{date.to_datetime.rfc3339}</updated>
288 <content type="html" xml:lang="en-us" xml:base="#{DURL}/blog">
289 <![CDATA[
290 #{feed_html}
291 ]]>
292 </content>
293 <rights>
294 #{feed_copyright}
295 </rights>
296 </entry>
297 EOF
298 return [feed_entry, blog_toc_entry]
299 end
300 end