Add anonymize function and improve docs
[bbdb-csv-import] / bbdb-csv-import.el
1 ;;; bbdb-csv-import.el --- import csv to bbdb version 3+
2
3 ;; Copyright (C) 2014 by Ian Kelling
4
5 ;; Maintainer: Ian Kelling <ian@iankelling.org>
6 ;; Author: Ian Kelling <ian@iankelling.org>
7 ;; Created: 1 Apr 2014
8 ;; Version: 1.1
9 ;; Package-Requires: ((pcsv "1.3.3") (dash "2.5.0") (bbdb "20140412.1949"))
10 ;; Keywords: csv, util, bbdb
11 ;; Homepage: https://gitlab.com/iankelling/bbdb-csv-import
12
13 ;; This program is free software; you can redistribute it and/or modify
14 ;; it under the terms of the GNU General Public License as published by
15 ;; the Free Software Foundation, either version 3 of the License, or
16 ;; (at your option) any later version.
17
18 ;; This program is distributed in the hope that it will be useful,
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 ;; GNU General Public License for more details.
22
23 ;; You should have received a copy of the GNU General Public License
24 ;; along with this program. If not, see <http://www.gnu.org/licenses/>.
25
26 ;;; Commentary:
27 ;;
28 ;; Importer of csv (comma separated value) text into Emacs’s bbdb database,
29 ;; version 3+. Works out of the box with csv exported from Thunderbird, Gmail,
30 ;; Linkedin, Outlook.com/hotmail, and probably others.
31 ;; Easily extensible to handle new formats.
32
33 ;;; Installation:
34 ;;
35 ;; If you installed this file with a package manager, just
36 ;;
37 ;; (require 'bbdb-csv-import)
38 ;;
39 ;; Else, note the min versions of dependencies above in "Package-Requires:",
40 ;; and load this file. The exact minimum bbdb version is unknown, something 3+.
41 ;;
42 ;;; Basic Usage:
43 ;;
44 ;; Back up bbdb by copying `bbdb-file' in case things go wrong.
45 ;;
46 ;; Simply M-x `bbdb-csv-import-buffer' or `bbdb-csv-import-file'.
47 ;; When called interactively, they prompt for file or buffer arguments.
48
49 ;;; Advanced usage / notes:
50 ;;
51 ;; Tested to work with thunderbird, gmail, linkedin,
52 ;; outlook.com/hotmail.com. For those programs, if it's exporter has an option
53 ;; of what kind of csv format, choose it's own native format if available, if
54 ;; not, choose an outlook compatible format. If you're exporting from some other
55 ;; program and its csv exporter claims outlook compatibility, there is a good
56 ;; chance it will work out of the box. If it doesn't, you can try to fix it as
57 ;; described below, or the maintainer will be happy to help, just anonymize your
58 ;; csv data using the M-x bbdb-csv-anonymize-current-buffer (make sure csv
59 ;; buffer is the current one) and attach it to an email.
60 ;;
61 ;; Duplicate contacts (according to email address) are skipped if
62 ;; bbdb-allow-duplicates is nil (default). Any duplicates found are echoed at
63 ;; the end of the import.
64
65 ;;; Custom mapping of csv fields
66 ;;
67 ;; If a field is handled wrong or you want to extend the program to handle a new
68 ;; kind of csv format, you need to setup a custom field mapping variable. It
69 ;; should not be too hard, but you can also Use the existing tables as an example. By default, we
70 ;; use a combination of most predefined mappings, and look for all of their
71 ;; fields, but it is probably best to avoid that kind of table when setting up
72 ;; your own as it is an unnecessary complexity in that case. If you have a
73 ;; problem with data from a supported export program, start by testing its
74 ;; specific mapping table instead of the combined one. Here is a handy template
75 ;; to set each of the predefined mapping tables if you would rather avoid the
76 ;; configure interface:
77 ;;
78 ;; (setq bbdb-csv-import-mapping-table bbdb-csv-import-combined)
79 ;; (setq bbdb-csv-import-mapping-table bbdb-csv-import-thunderbird)
80 ;; (setq bbdb-csv-import-mapping-table bbdb-csv-import-gmail)
81 ;; (setq bbdb-csv-import-mapping-table bbdb-csv-import-gmail-typed-email)
82 ;; (setq bbdb-csv-import-mapping-table bbdb-csv-import-linkedin)
83 ;; (setq bbdb-csv-import-mapping-table bbdb-csv-import-outlook-web)
84 ;; (setq bbdb-csv-import-mapping-table bbdb-csv-import-outlook-typed-email)
85 ;;
86 ;; The doc string for `bbdb-create-internal' may also be useful when creating a
87 ;; mapping table. Please send any new tables to the maintainer listed in this
88 ;; file. The maintainer should be able to help with any issues and may create a
89 ;; new mapping table given sample data.
90
91 ;;; Misc tips/troubleshooting:
92 ;;
93 ;; - ASynK looks promising for syncing bbdb/google/outlook.
94 ;; - The git repo contains a test folder with exactly tested version info and working
95 ;; test data. Software, and especially online services are prone to changing how they
96 ;; export. Please send feedback if you run into problems.
97 ;; - bbdb doesn't work if you delete the bbdb database file in
98 ;; the middle of an emacs session. If you want to empty the current bbdb database,
99 ;; do M-x bbdb then .* then C-u * d on the beginning of a record.
100 ;; - After changing a mapping table variable, don't forget to re-execute
101 ;; (setq bbdb-csv-import-mapping-table ...) so that it propagates.
102
103 ;;; Bugs, patches, discussion, feedback
104 ;;
105 ;; Patches and bugs are very welcome via https://gitlab.com/iankelling/bbdb-csv-import
106 ;;
107 ;; Questions, feedback, etc are very welcome via email to Ian Kelling
108 ;; <ian@iankelling.org>. I will add any useful questions, answers, etc. to this
109 ;; file. The scope/userbase of this project doesn't justify a mailing list, but if
110 ;; it ever did I would start a mailman or discourse to act as a mailing list
111 ;; and forum.
112
113 ;;; known bugs:
114 ;; * linkedin data contains ^M characters that need to be removed before import
115 ;; * blank lines are not ignored
116
117
118 ;;; Code:
119 (require 'pcsv)
120 (require 'dash)
121 (require 'bbdb-com)
122 (eval-when-compile (require 'cl))
123
124
125 (defconst bbdb-csv-import-thunderbird
126 '((:namelist "First Name" "Last Name")
127 (:name "Display Name")
128 (:aka "Nickname")
129 (:mail "Primary Email" "Secondary Email")
130 (:phone "Work Phone" "Home Phone" "Fax Number" "Pager Number" "Mobile Number")
131 (:address
132 (("home address"
133 (("Home Address" "Home Address 2")
134 "Home City" "Home State"
135 "Home ZipCode" "Home Country"))
136 ("work address"
137 (("Work Address" "Work Address 2")
138 "Work City" "Work State"
139 "Work ZipCode" "Work Country"))))
140 (:organization "Organization")
141 (:xfields "Web Page 1" "Web Page 2" "Birth Year" "Birth Month"
142 "Birth Day" "Department" "Custom 1" "Custom 2" "Custom 3"
143 "Custom 4" "Notes" "Job Title"))
144 "Thunderbird csv format")
145
146 (defconst bbdb-csv-import-linkedin
147 '((:namelist "First Name" "Middle Name" "Last Name")
148 (:affix "Suffix")
149 (:mail "E-mail Address" "E-mail 2 Address" "E-mail 3 Address")
150 (:phone
151 "Assistant's Phone" "Business Fax" "Business Phone"
152 "Business Phone 2" "Callback" "Car Phone"
153 "Company Main Phone" "Home Fax" "Home Phone"
154 "Home Phone 2" "ISDN" "Mobile Phone"
155 "Other Fax" "Other Phone" "Pager"
156 "Primary Phone" "Radio Phone" "TTY/TDD Phone" "Telex")
157 (:address
158 (("business address"
159 (("Business Street" "Business Street 2" "Business Street 3")
160 "Business City" "Business State"
161 "Business Postal Code" "Business Country"))
162 ("home address"
163 (("Home Street" "Home Street 2" "Home Street 3")
164 "Home City" "Home State"
165 "Home Postal Code" "Home Country"))
166 ("other address"
167 (("Other Street" "Other Street 2" "Other Street 3")
168 "Other City" "Other State"
169 "Other Postal Code" "Other Country"))))
170 (:organization "Company")
171 (:xfields
172 "Department" "Job Title" "Assistant's Name"
173 "Birthday" "Manager's Name" "Notes" "Other Address PO Box"
174 "Spouse" "Web Page" "Personal Web Page"))
175 "Linkedin export in the Outlook csv format.")
176
177
178 (defconst bbdb-csv-import-gmail
179 '((:namelist "Given Name" "Family Name")
180 (:name "Name")
181 (:affix "Name Prefix" "Name Suffix")
182 (:aka "Nickname")
183 (:mail (repeat "E-mail 1 - Value"))
184 (:phone (repeat ("Phone 1 - Type" "Phone 1 - Value")))
185 (:address
186 (repeat (("Address 1 - Type")
187 (("Address 1 - Street" "Address 1 - PO Box" "Address 1 - Extended Address")
188 "Address 1 - City" "Address 1 - Region"
189 "Address 1 - Postal Code" "Address 1 - Country"))))
190 (:organization (repeat "Organization 1 - Name"))
191 (:xfields
192 "Additional Name" "Yomi Name" "Given Name Yomi"
193 "Additional Name Yomi" "Family Name Yomi"
194 "Initials" "Short Name" "Maiden Name" "Birthday"
195 "Gender" "Location" "Billing Information"
196 "Directory Server" "Mileage" "Occupation"
197 "Hobby" "Sensitivity" "Priority"
198 "Subject" "Notes" "Group Membership"
199 ;; Gmail wouldn't let me add more than 1 organization in its web interface,
200 ;; but no harm in looking for multiple since the field name implies the
201 ;; possibility.
202 (repeat
203 "Organization 1 - Type" "Organization 1 - Yomi Name"
204 "Organization 1 - Title" "Organization 1 - Department"
205 "Organization 1 - Symbol" "Organization 1 - Location"
206 "Organization 1 - Job Description")
207 (repeat ("Relation 1 - Type" "Relation 1 - Value"))
208 (repeat ("Website 1 - Type" "Website 1 - Value"))
209 (repeat ("Event 1 - Type" "Event 1 - Value"))
210 (repeat ("Custom Field 1 - Type" "Custom Field 1 - Value"))))
211 "Gmail csv export format. Note some fields don't map perfectly,
212 feel free to modify them as you wish. \"PO Box\" and \"Extended
213 Address\" are added as additional address street lines if they
214 exist. Some special name fields are made custom instead of put in
215 name, which gets a single string. We map Gmail's \"Name Prefix\"
216 and \"Name Suffix\" to bbdb's affix (a list of strings). We lose
217 the prefix/suffix label, but those are usually obvious.")
218
219
220 (defconst bbdb-csv-import-gmail-typed-email
221 (append (car (last bbdb-csv-import-gmail)) '((repeat "E-mail 1 - Type")))
222 "Like the first Gmail mapping, but use custom fields to store
223 Gmail's email labels. This is separate because I assume most
224 people don't use those labels and using the default labels
225 would create useless custom fields.")
226
227 (defconst bbdb-csv-import-outlook-web
228 '((:namelist "First Name" "Middle Name" "Last Name")
229 (:mail "E-mail Address" "E-mail 2 Address" "E-mail 3 Address")
230 (:affix "Suffix")
231 (:phone
232 "Assistant's Phone" "Business Fax" "Business Phone"
233 "Business Phone 2" "Callback" "Car Phone"
234 "Company Main Phone" "Home Fax" "Home Phone"
235 "Home Phone 2" "ISDN" "Mobile Phone"
236 "Other Fax" "Other Phone" "Pager"
237 "Primary Phone" "Radio Phone" "TTY/TDD Phone" "Telex")
238 (:address
239 (("business address"
240 (("Business Street")
241 "Business City" "Business State"
242 "Business Postal Code" "Business Country"))
243 ("home address"
244 (("Home Street")
245 "Home City" "Home State"
246 "Home Postal Code" "Home Country"))
247 ("other address"
248 (("Other Street")
249 "Other City" "Other State"
250 "Other Postal Code" "Other Country"))))
251 (:organization "Company")
252 (:xfields
253 "Anniversary" "Family Name Yomi" "Given Name Yomi"
254 "Department" "Job Title" "Birthday" "Manager's Name" "Notes"
255 "Spouse" "Web Page"))
256 "Hotmail.com, outlook.com, live.com, etc.
257 Based on 'Export for outlook.com and other services',
258 not the export for Outlook 2010 and 2013.")
259
260 (defconst bbdb-csv-import-outlook-typed-email
261 (append (car (last bbdb-csv-import-outlook-web)) '((repeat "E-mail 1 - Type")))
262 "Like bbdb-csv-import-gmail-typed-email, but for outlook-web.
263 Adds email labels as custom fields.")
264
265
266 (defun bbdb-csv-import-flatten1 (list)
267 "Flatten LIST by 1 level."
268 (--reduce-from (if (consp it)
269 (-concat acc it)
270 (-snoc acc it))
271 nil list))
272
273
274 (defun bbdb-csv-import-merge-map (root)
275 "Combine two root mappings for making a combined mapping."
276 (bbdb-csv-import-flatten1
277 (list root
278 (-distinct
279 (append
280 (cdr (assoc root bbdb-csv-import-thunderbird))
281 (cdr (assoc root bbdb-csv-import-linkedin))
282 (cdr (assoc root bbdb-csv-import-gmail))
283 (cdr (assoc root bbdb-csv-import-outlook-web)))))))
284
285
286 (defconst bbdb-csv-import-combined
287 (list
288 ;; manually combined for proper ordering
289 '(:namelist "First Name" "Given Name" "Middle Name" "Last Name" "Family Name")
290 (bbdb-csv-import-merge-map :name)
291 (bbdb-csv-import-merge-map :affix)
292 (bbdb-csv-import-merge-map :aka)
293 (bbdb-csv-import-merge-map :mail)
294 (bbdb-csv-import-merge-map :phone)
295 ;; manually combined the addresses. Because it was easier.
296 '(:address
297 (repeat (("Address 1 - Type")
298 (("Address 1 - Street" "Address 1 - PO Box" "Address 1 - Extended Address")
299 "Address 1 - City" "Address 1 - Region"
300 "Address 1 - Postal Code" "Address 1 - Country")))
301 (("business address"
302 (("Business Street" "Business Street 2" "Business Street 3")
303 "Business City" "Business State"
304 "Business Postal Code" "Business Country"))
305 ("home address"
306 (("Home Street" "Home Street 2" "Home Street 3"
307 "Home Address" "Home Address 2")
308 "Home City" "Home State"
309 "Home Postal Code" "Home ZipCode" "Home Country"))
310 ("work address"
311 (("Work Address" "Work Address 2")
312 "Work City" "Work State"
313 "Work ZipCode" "Work Country"))
314 ("other address"
315 (("Other Street" "Other Street 2" "Other Street 3")
316 "Other City" "Other State"
317 "Other Postal Code" "Other Country"))))
318 (bbdb-csv-import-merge-map :organization)
319 (bbdb-csv-import-merge-map :xfields)))
320
321 (defcustom bbdb-csv-import-mapping-table bbdb-csv-import-combined
322 "The table which maps bbdb fields to csv fields. The default should work for most cases.
323 See the commentary section of this file for more details."
324 :group 'bbdb-csv-import
325 :type 'symbol)
326
327
328 (defun bbdb-csv-import-expand-repeats (csv-fields list)
329 "Return new list where elements from LIST in form (repeat elem1 ...)
330 become ((elem1 ...) [(elem2 ...)] ...) for as many repeating
331 numbered fields exist in the csv fields. elem can be a string or
332 a tree (a list with lists inside it)"
333 (cl-flet ((replace-num (num string)
334 ;; in STRING, replace all groups of numbers with NUM
335 (replace-regexp-in-string "[0-9]+"
336 (number-to-string num)
337 string)))
338 (--reduce-from
339 (if (not (and (consp it) (eq (car it) 'repeat)))
340 (cons it acc)
341 (setq it (cdr it))
342 (let* ((i 1)
343 (first-field (car (-flatten it))))
344 (setq acc (cons it acc))
345 ;; use first-field to test if there is another repetition.
346 (while (member
347 (replace-num (setq i (1+ i)) first-field)
348 csv-fields)
349 (cl-labels ((fun (cell)
350 (if (consp cell)
351 (mapcar #'fun cell)
352 (replace-num i cell))))
353 (setq acc (cons (fun it) acc))))
354 acc))
355 nil list)))
356
357 (defun bbdb-csv-import-map-bbdb (csv-fields root)
358 "ROOT is a root element from bbdb-csv-import-mapping-table. Get
359 the csv-fields for root in the mapping format, including variably
360 repeated ones. Flatten by one because repeated fields are put in
361 sub-lists, but after expanding them, that extra depth is no
362 longer useful. Small trade off: address mappings without 'repeat need
363 to be grouped in a list because they contain sublists that we
364 don't want flattened."
365 (bbdb-csv-import-flatten1
366 (bbdb-csv-import-expand-repeats
367 csv-fields
368 (cdr (assoc root bbdb-csv-import-mapping-table)))))
369
370 ;;;###autoload
371 (defun bbdb-csv-import-file (filename)
372 "Parse and import csv file FILENAME to bbdb."
373 (interactive "fCSV file containg contact data: ")
374 (bbdb-csv-import-buffer (find-file-noselect filename)))
375
376 ;;;###autoload
377 (defun bbdb-csv-import-buffer (&optional buffer-or-name)
378 "Parse and import csv BUFFER-OR-NAME to bbdb.
379 Argument is a buffer or name of a buffer.
380 Defaults to current buffer."
381 (interactive "bBuffer containing CSV contact data: ")
382 (when (null bbdb-csv-import-mapping-table)
383 (error "error: `bbdb-csv-import-mapping-table' is nil. Please set it and rerun."))
384 (let* ((csv-data (pcsv-parse-buffer (get-buffer (or buffer-or-name (current-buffer)))))
385 (csv-fields (car csv-data))
386 (csv-data (cdr csv-data))
387 (allow-dupes bbdb-allow-duplicates)
388 csv-record rd assoc-plus map-bbdb dupes)
389 ;; convenient function names
390 (fset 'rd 'bbdb-csv-import-rd)
391 (fset 'assoc-plus 'bbdb-csv-import-assoc-plus)
392 (fset 'map-bbdb (-partial 'bbdb-csv-import-map-bbdb csv-fields))
393 ;; we handle duplicates ourselves
394 (setq bbdb-allow-duplicates t)
395 ;; loop over the csv records
396 (while (setq csv-record (map 'list 'cons csv-fields (pop csv-data)))
397 (cl-flet*
398 ((ca (key list) (cdr (assoc key list))) ;; utility function
399 (rd-assoc (root)
400 ;; given ROOT, return a list of data, ignoring empty fields
401 (rd (lambda (elem) (assoc-plus elem csv-record)) (map-bbdb root)))
402 (assoc-expand (e)
403 ;; E = data-field-name | (field-name-field data-field)
404 ;; get data from the csv-record and return (field-name data) or nil.
405 (let ((data-name (if (consp e) (ca (car e) csv-record) e))
406 (data (assoc-plus (if (consp e) (cadr e) e) csv-record)))
407 (if data (list data-name data)))))
408 ;; set the arguments to bbdb-create-internal, then call it, the end.
409 (let ((name (let ((name (rd-assoc :namelist)))
410 ;; prioritize any combination of first middle last over :name
411 (if (>= (length name) 2)
412 (mapconcat 'identity name " ")
413 (car (rd-assoc :name)))))
414 (affix (rd-assoc :affix))
415 (aka (rd-assoc :aka))
416 (organization (rd-assoc :organization))
417 (mail (rd-assoc :mail))
418 (phone (rd 'vconcat (rd #'assoc-expand (map-bbdb :phone))))
419 (address (rd (lambda (e)
420
421 (let ((al (rd (lambda (elem) ;; al = address lines
422 (assoc-plus elem csv-record))
423 (caadr e)))
424 ;; to use bbdb-csv-import-combined, we can't mapcar
425 (address-data (--reduce-from (if (member it csv-fields)
426 (cons (ca it csv-record) acc)
427 acc)
428 nil (cdadr e)))
429 (elem-name (car e)))
430 (setq al (nreverse al))
431 (setq address-data (nreverse address-data))
432 ;; make it a list of at least 2 elements
433 (setq al (append al
434 (-repeat (- 2 (length al)) "")))
435 (when (consp elem-name)
436 (setq elem-name (ca (car elem-name) csv-record)))
437
438 ;; determine if non-nil and put together the minimum set
439 (when (or (not (--all? (zerop (length it)) address-data))
440 (not (--all? (zerop (length it)) al)))
441 (when (> 2 (length al))
442 (setcdr (max 2 (nthcdr (--find-last-index (not (null it))
443 al)
444 al)) nil))
445 (vconcat (list elem-name) (list al) address-data))))
446 (map-bbdb :address)))
447 (xfields (rd (lambda (list)
448 (let ((e (car list)))
449 (while (string-match "-" e)
450 (setq e (replace-match "" nil nil e)))
451 (while (string-match " +" e)
452 (setq e (replace-match "-" nil nil e)))
453 (setq e (make-symbol (downcase e)))
454 (cons e (cadr list)))) ;; change from (a b) to (a . b)
455 (rd #'assoc-expand (map-bbdb :xfields)))))
456 ;; we copy and subvert bbdb's duplicate detection instead of catching
457 ;; errors so that we don't interfere with other errors, and can print
458 ;; them nicely at the end.
459 (let (found-dupe)
460 (dolist (elt mail)
461 (when (bbdb-gethash elt '(mail))
462 (push elt dupes)
463 (setq found-dupe t)))
464 (when (or allow-dupes (not found-dupe))
465 (bbdb-create-internal name affix aka organization mail phone address xfields t))))))
466 (when dupes (if allow-dupes
467 (message "Warning, contacts with duplicate email addresses were imported:\n%s" dupes)
468 (message "Skipped contacts with duplicate email addresses:\n%s" dupes)))
469 (setq bbdb-allow-duplicates allow-dupes)))
470
471 (defun bbdb-csv-import-rd (func list)
472 "like mapcar but don't build nil results into the resulting list"
473 (--reduce-from (let ((funcreturn (funcall func it)))
474 (if funcreturn
475 (cons funcreturn acc)
476 acc))
477 nil list))
478
479 (defun bbdb-csv-import-assoc-plus (key list)
480 "Like (cdr assoc ...) but turn an empty string result to nil."
481 (let ((result (cdr (assoc key list))))
482 (when (not (string= "" result))
483 result)))
484
485 (defun bbdb-csv-anonymize-current-buffer ()
486 (interactive)
487 "Anonymize the current buffer which contains csv data.
488 The first line should contain header names."
489 (goto-line 2)
490 (while (re-search-forward "\\w")
491 (delete-char -1)
492 (insert (number-to-string (random 9)))))
493
494
495 (provide 'bbdb-csv-import)
496
497 ;;; bbdb-csv-import.el ends here