Use the new dom.el accessors in shr and eww

* net/shr.el: Ditto.

* net/eww.el: Use the new dom.el accessors throughout.
This commit is contained in:
Lars Magne Ingebrigtsen
2014-11-26 19:41:13 +01:00
parent 115178cd46
commit d9ba097fe4
2 changed files with 327 additions and 396 deletions

View File

@@ -406,38 +406,38 @@ See the `eww-search-prefix' variable for the search engine used."
(setq eww-history-position 0)
(eww-update-header-line-format))))
(defun eww-handle-link (cont)
(let* ((rel (assq :rel cont))
(href (assq :href cont))
(where (assoc
;; The text associated with :rel is case-insensitive.
(if rel (downcase (cdr rel)))
'(("next" . :next)
;; Texinfo uses "previous", but HTML specifies
;; "prev", so recognize both.
("previous" . :previous)
("prev" . :previous)
;; HTML specifies "start" but also "contents",
;; and Gtk seems to use "home". Recognize
;; them all; but store them in different
;; variables so that we can readily choose the
;; "best" one.
("start" . :start)
("home" . :home)
("contents" . :contents)
("up" . up)))))
(defun eww-handle-link (dom)
(let* ((rel (dom-attr dom 'rel))
(href (dom-attr dom 'href))
(where (assoc
;; The text associated with :rel is case-insensitive.
(if rel (downcase rel))
'(("next" . :next)
;; Texinfo uses "previous", but HTML specifies
;; "prev", so recognize both.
("previous" . :previous)
("prev" . :previous)
;; HTML specifies "start" but also "contents",
;; and Gtk seems to use "home". Recognize
;; them all; but store them in different
;; variables so that we can readily choose the
;; "best" one.
("start" . :start)
("home" . :home)
("contents" . :contents)
("up" . up)))))
(and href
where
(plist-put eww-data (cdr where) (cdr href)))))
(plist-put eww-data (cdr where) href))))
(defun eww-tag-link (cont)
(eww-handle-link cont)
(shr-generic cont))
(defun eww-tag-link (dom)
(eww-handle-link dom)
(shr-generic dom))
(defun eww-tag-a (cont)
(eww-handle-link cont)
(defun eww-tag-a (dom)
(eww-handle-link dom)
(let ((start (point)))
(shr-tag-a cont)
(shr-tag-a dom)
(put-text-property start (point) 'keymap eww-link-keymap)))
(defun eww-update-header-line-format ()
@@ -452,25 +452,24 @@ See the `eww-search-prefix' variable for the search engine used."
(?t . ,(or (plist-get eww-data :title) ""))))))
(setq header-line-format nil)))
(defun eww-tag-title (cont)
(defun eww-tag-title (dom)
(let ((title ""))
(dolist (sub cont)
(when (eq (car sub) 'text)
(setq title (concat title (cdr sub)))))
(dolist (sub (dom-children dom))
(when (stringp sub)
(setq title (concat title sub))))
(plist-put eww-data :title
(replace-regexp-in-string
"^ \\| $" ""
(replace-regexp-in-string "[ \t\r\n]+" " " title))))
(eww-update-header-line-format))
(defun eww-tag-body (cont)
(defun eww-tag-body (dom)
(let* ((start (point))
(fgcolor (cdr (or (assq :fgcolor cont)
(assq :text cont))))
(bgcolor (cdr (assq :bgcolor cont)))
(fgcolor (or (dom-attr dom 'fgcolor) (dom-attr dom 'text)))
(bgcolor (dom-attr dom 'bgcolor))
(shr-stylesheet (list (cons 'color fgcolor)
(cons 'background-color bgcolor))))
(shr-generic cont)
(shr-generic dom)
(shr-colorize-region start (point) fgcolor bgcolor)))
(defun eww-display-raw (buffer &optional encode)
@@ -550,18 +549,16 @@ contains the main textual portion, leaving out navigation menus and
the like."
(interactive)
(let* ((old-data eww-data)
(dom (shr-transform-dom
(with-temp-buffer
(insert (plist-get old-data :source))
(condition-case nil
(decode-coding-region (point-min) (point-max) 'utf-8)
(coding-system-error nil))
(libxml-parse-html-region (point-min) (point-max))))))
(dom (with-temp-buffer
(insert (plist-get old-data :source))
(condition-case nil
(decode-coding-region (point-min) (point-max) 'utf-8)
(coding-system-error nil))
(libxml-parse-html-region (point-min) (point-max)))))
(eww-score-readability dom)
(eww-save-history)
(eww-display-html nil nil
(shr-retransform-dom
(eww-highest-readability dom))
(eww-highest-readability dom)
nil (current-buffer))
(dolist (elem '(:source :url :title :next :previous :up))
(plist-put eww-data elem (plist-get old-data elem)))
@@ -570,41 +567,35 @@ the like."
(defun eww-score-readability (node)
(let ((score -1))
(cond
((memq (car node) '(script head comment))
((memq (dom-tag node) '(script head comment))
(setq score -2))
((eq (car node) 'meta)
((eq (dom-tag node) 'meta)
(setq score -1))
((eq (car node) 'img)
((eq (dom-tag node) 'img)
(setq score 2))
((eq (car node) 'a)
(setq score (- (length (split-string
(or (cdr (assoc 'text (cdr node))) ""))))))
((eq (dom-tag node) 'a)
(setq score (- (length (split-string (dom-text node))))))
(t
(dolist (elem (cdr node))
(cond
((and (stringp (cdr elem))
(eq (car elem) 'text))
(setq score (+ score (length (split-string (cdr elem))))))
((consp (cdr elem))
(dolist (elem (dom-children node))
(if (stringp elem)
(setq score (+ score (length (split-string elem))))
(setq score (+ score
(or (cdr (assoc :eww-readability-score (cdr elem)))
(eww-score-readability elem)))))))))
(eww-score-readability elem))))))))
;; Cache the score of the node to avoid recomputing all the time.
(setcdr node (cons (cons :eww-readability-score score) (cdr node)))
(dom-set-attribute node :eww-readability-score score)
score))
(defun eww-highest-readability (node)
(let ((result node)
highest)
(dolist (elem (cdr node))
(when (and (consp (cdr elem))
(> (or (cdr (assoc
:eww-readability-score
(setq highest
(eww-highest-readability elem))))
most-negative-fixnum)
(or (cdr (assoc :eww-readability-score (cdr result)))
most-negative-fixnum)))
(dolist (elem (dom-children node))
(when (> (or (dom-attr
(setq highest (eww-highest-readability elem))
:eww-readability-score)
most-negative-fixnum)
(or (dom-attr (cdr result) :eww-readability-score)
most-negative-fixnum))
(setq result highest)))
result))
@@ -864,13 +855,12 @@ appears in a <link> or <a> tag."
(1- (next-single-property-change
(point) 'eww-form nil (point-max))))
(defun eww-tag-form (cont)
(let ((eww-form
(list (assq :method cont)
(assq :action cont)))
(defun eww-tag-form (dom)
(let ((eww-form (list (cons :method (dom-attr dom 'method))
(cons :action (dom-attr dom 'action))))
(start (point)))
(shr-ensure-paragraph)
(shr-generic cont)
(shr-generic dom)
(unless (bolp)
(insert "\n"))
(insert "\n")
@@ -878,9 +868,9 @@ appears in a <link> or <a> tag."
(put-text-property start (1+ start)
'eww-form eww-form))))
(defun eww-form-submit (cont)
(defun eww-form-submit (dom)
(let ((start (point))
(value (cdr (assq :value cont))))
(value (dom-attr dom 'value)))
(setq value
(if (zerop (length value))
"Submit"
@@ -891,28 +881,28 @@ appears in a <link> or <a> tag."
(list :eww-form eww-form
:value value
:type "submit"
:name (cdr (assq :name cont))))
:name (dom-attr dom 'name)))
(put-text-property start (point) 'keymap eww-submit-map)
(insert " ")))
(defun eww-form-checkbox (cont)
(defun eww-form-checkbox (dom)
(let ((start (point)))
(if (cdr (assq :checked cont))
(if (dom-attr dom 'checked)
(insert eww-form-checkbox-selected-symbol)
(insert eww-form-checkbox-symbol))
(add-face-text-property start (point) 'eww-form-checkbox)
(put-text-property start (point) 'eww-form
(list :eww-form eww-form
:value (cdr (assq :value cont))
:type (downcase (cdr (assq :type cont)))
:checked (cdr (assq :checked cont))
:name (cdr (assq :name cont))))
:value (dom-attr dom 'value)
:type (downcase (dom-attr dom 'type))
:checked (dom-attr dom 'checked)
:name (dom-attr dom 'name)))
(put-text-property start (point) 'keymap eww-checkbox-map)
(insert " ")))
(defun eww-form-file (cont)
(defun eww-form-file (dom)
(let ((start (point))
(value (cdr (assq :value cont))))
(value (dom-attr dom 'value)))
(setq value
(if (zerop (length value))
" No file selected"
@@ -922,9 +912,9 @@ appears in a <link> or <a> tag."
(insert value)
(put-text-property start (point) 'eww-form
(list :eww-form eww-form
:value (cdr (assq :value cont))
:type (downcase (cdr (assq :type cont)))
:name (cdr (assq :name cont))))
:value (dom-attr dom 'value)
:type (downcase (dom-attr dom 'type))
:name (dom-attr dom 'name)))
(put-text-property start (point) 'keymap eww-submit-file)
(insert " ")))
@@ -938,16 +928,13 @@ appears in a <link> or <a> tag."
(eww-update-field filename (length "Browse"))
(plist-put input :filename filename))))
(defun eww-form-text (cont)
(defun eww-form-text (dom)
(let ((start (point))
(type (downcase (or (cdr (assq :type cont))
"text")))
(value (or (cdr (assq :value cont)) ""))
(width (string-to-number
(or (cdr (assq :size cont))
"40")))
(readonly-property (if (or (cdr (assq :disabled cont))
(cdr (assq :readonly cont)))
(type (downcase (or (dom-attr dom 'type) "text")))
(value (or (dom-attr dom 'value) ""))
(width (string-to-number (or (dom-attr dom 'size) "40")))
(readonly-property (if (or (dom-attr dom 'disabled)
(dom-attr dom 'readonly))
'read-only
'inhibit-read-only)))
(insert value)
@@ -961,7 +948,7 @@ appears in a <link> or <a> tag."
(list :eww-form eww-form
:value value
:type type
:name (cdr (assq :name cont))))
:name (dom-attr dom 'name)))
(insert " ")))
(defconst eww-text-input-types '("text" "password" "textarea"
@@ -1014,15 +1001,11 @@ See URL `https://developer.mozilla.org/en-US/docs/Web/HTML/Element/Input'.")
(put-text-property start (+ start (length value))
'display (make-string (length value) ?*))))))))
(defun eww-tag-textarea (cont)
(defun eww-tag-textarea (dom)
(let ((start (point))
(value (or (cdr (assq :value cont)) ""))
(lines (string-to-number
(or (cdr (assq :rows cont))
"10")))
(width (string-to-number
(or (cdr (assq :cols cont))
"10")))
(value (or (dom-attr dom 'value) ""))
(lines (string-to-number (or (dom-attr dom 'rows) "10")))
(width (string-to-number (or (dom-attr dom 'cols) "10")))
end)
(shr-ensure-newline)
(insert value)
@@ -1047,23 +1030,22 @@ See URL `https://developer.mozilla.org/en-US/docs/Web/HTML/Element/Input'.")
(list :eww-form eww-form
:value value
:type "textarea"
:name (cdr (assq :name cont))))))
:name (dom-attr dom 'name)))))
(defun eww-tag-input (cont)
(let ((type (downcase (or (cdr (assq :type cont))
"text")))
(defun eww-tag-input (dom)
(let ((type (downcase (or (dom-attr dom 'type) "text")))
(start (point)))
(cond
((or (equal type "checkbox")
(equal type "radio"))
(eww-form-checkbox cont))
(eww-form-checkbox dom))
((equal type "file")
(eww-form-file cont))
(eww-form-file dom))
((equal type "submit")
(eww-form-submit cont))
(eww-form-submit dom))
((equal type "hidden")
(let ((form eww-form)
(name (cdr (assq :name cont))))
(name (dom-attr dom 'name)))
;; Don't add <input type=hidden> elements repeatedly.
(while (and form
(or (not (consp (car form)))
@@ -1075,34 +1057,33 @@ See URL `https://developer.mozilla.org/en-US/docs/Web/HTML/Element/Input'.")
(nconc eww-form (list
(list 'hidden
:name name
:value (cdr (assq :value cont))))))))
:value (dom-attr dom 'value)))))))
(t
(eww-form-text cont)))
(eww-form-text dom)))
(unless (= start (point))
(put-text-property start (1+ start) 'help-echo "Input field"))))
(defun eww-tag-select (cont)
(defun eww-tag-select (dom)
(shr-ensure-paragraph)
(let ((menu (list :name (cdr (assq :name cont))
(let ((menu (list :name (dom-attr dom 'name)
:eww-form eww-form))
(options nil)
(start (point))
(max 0)
opelem)
(if (eq (car (car cont)) 'optgroup)
(dolist (groupelem cont)
(unless (cdr (assq :disabled (cdr groupelem)))
(setq opelem (append opelem (cdr (cdr groupelem))))))
(setq opelem cont))
(if (eq (dom-tag dom) 'optgroup)
(dolist (groupelem (dom-children dom))
(unless (dom-attr groupelem 'disabled)
(setq opelem (append opelem (list groupelem)))))
(setq opelem (list dom)))
(dolist (elem opelem)
(when (eq (car elem) 'option)
(when (cdr (assq :selected (cdr elem)))
(nconc menu (list :value
(cdr (assq :value (cdr elem))))))
(let ((display (or (cdr (assq 'text (cdr elem))) "")))
(when (eq (dom-tag elem) 'option)
(when (dom-attr elem 'selected)
(nconc menu (list :value (dom-attr elem 'value))))
(let ((display (dom-text elem)))
(setq max (max max (length display)))
(push (list 'item
:value (cdr (assq :value (cdr elem)))
:value (dom-attr elem 'value)
:display display)
options))))
(when options
@@ -1302,8 +1283,7 @@ See URL `https://developer.mozilla.org/en-US/docs/Web/HTML/Element/Input'.")
(eww-browse-url
(concat
(if (cdr (assq :action form))
(shr-expand-url (cdr (assq :action form))
(plist-get eww-data :url))
(shr-expand-url (cdr (assq :action form)) (plist-get eww-data :url))
(plist-get eww-data :url))
"?"
(mm-url-encode-www-form-urlencoded values))))))

View File

@@ -33,6 +33,8 @@
(eval-when-compile (require 'cl))
(eval-when-compile (require 'url)) ;For url-filename's setf handler.
(require 'browse-url)
(require 'subr-x)
(require 'dom)
(defgroup shr nil
"Simple HTML Renderer"
@@ -205,7 +207,7 @@ DOM should be a parse tree as generated by
(shr-depth 0)
(shr-warning nil)
(shr-internal-width (or shr-width (1- (window-width)))))
(shr-descend (shr-transform-dom dom))
(shr-descend dom)
(shr-remove-trailing-whitespace start (point))
(when shr-warning
(message "%s" shr-warning))))
@@ -366,53 +368,20 @@ size, and full-buffer size."
;;; Utility functions.
(defun shr-transform-dom (dom)
(let ((result (list (pop dom))))
(dolist (arg (pop dom))
(push (cons (intern (concat ":" (symbol-name (car arg))) obarray)
(cdr arg))
result))
(dolist (sub dom)
(if (stringp sub)
(push (cons 'text sub) result)
(push (shr-transform-dom sub) result)))
(nreverse result)))
(defun shr-retransform-dom (dom)
"Transform the shr DOM back into the libxml DOM."
(let ((tag (car dom))
(attributes nil)
(sub-nodes nil))
(dolist (elem (cdr dom))
(cond
((and (stringp (cdr elem))
(eq (car elem) 'text))
(push (cdr elem) sub-nodes))
((not (listp (cdr elem)))
(push (cons (intern (substring (symbol-name (car elem)) 1) obarray)
(cdr elem))
attributes))
(t
(push (shr-retransform-dom elem) sub-nodes))))
(append (list tag (nreverse attributes))
(nreverse sub-nodes))))
(defsubst shr-generic (cont)
(dolist (sub cont)
(cond
((eq (car sub) 'text)
(shr-insert (cdr sub)))
((listp (cdr sub))
(shr-descend sub)))))
(defsubst shr-generic (dom)
(dolist (sub (dom-children dom))
(if (stringp sub)
(shr-insert sub)
(shr-descend sub))))
(defun shr-descend (dom)
(let ((function
(or
;; Allow other packages to override (or provide) rendering
;; of elements.
(cdr (assq (car dom) shr-external-rendering-functions))
(intern (concat "shr-tag-" (symbol-name (car dom))) obarray)))
(style (cdr (assq :style (cdr dom))))
(cdr (assq (dom-tag dom) shr-external-rendering-functions))
(intern (concat "shr-tag-" (symbol-name (dom-tag dom))) obarray)))
(style (dom-attr dom 'style))
(shr-stylesheet shr-stylesheet)
(shr-depth (1+ shr-depth))
(start (point)))
@@ -427,10 +396,10 @@ size, and full-buffer size."
;; If we have a display:none, then just ignore this part of the DOM.
(unless (equal (cdr (assq 'display shr-stylesheet)) "none")
(if (fboundp function)
(funcall function (cdr dom))
(shr-generic (cdr dom)))
(funcall function dom)
(shr-generic dom))
(when (and shr-target-id
(equal (cdr (assq :id (cdr dom))) shr-target-id))
(equal (dom-attr dom 'id) shr-target-id))
;; If the element was empty, we don't have anything to put the
;; anchor on. So just insert a dummy character.
(when (= start (point))
@@ -684,9 +653,9 @@ size, and full-buffer size."
(when (> shr-indentation 0)
(insert (make-string shr-indentation ? ))))
(defun shr-fontize-cont (cont &rest types)
(defun shr-fontize-dom (dom &rest types)
(let (shr-start)
(shr-generic cont)
(shr-generic dom)
(dolist (type types)
(shr-add-font (or shr-start (point)) (point) type))))
@@ -879,8 +848,7 @@ Return a string with image data."
(when (eq content-type 'image/svg+xml)
(setq data
(shr-dom-to-xml
(shr-transform-dom
(libxml-parse-xml-region (point) (point-max))))))
(libxml-parse-xml-region (point) (point-max)))))
(list data content-type)))
(defun shr-image-displayer (content-function)
@@ -903,9 +871,9 @@ START, and END. Note that START and END should be markers."
(list (current-buffer) start end)
t t)))))
(defun shr-heading (cont &rest types)
(defun shr-heading (dom &rest types)
(shr-ensure-paragraph)
(apply #'shr-fontize-cont cont types)
(apply #'shr-fontize-dom dom types)
(shr-ensure-paragraph))
(defun shr-urlify (start url &optional title)
@@ -1014,105 +982,98 @@ ones, in case fg and bg are nil."
;;; Tag-specific rendering rules.
(defun shr-tag-body (cont)
(defun shr-tag-body (dom)
(let* ((start (point))
(fgcolor (cdr (or (assq :fgcolor cont)
(assq :text cont))))
(bgcolor (cdr (assq :bgcolor cont)))
(fgcolor (or (dom-attr dom 'fgcolor) (dom-attr dom 'text)))
(bgcolor (dom-attr dom 'bgcolor))
(shr-stylesheet (list (cons 'color fgcolor)
(cons 'background-color bgcolor))))
(shr-generic cont)
(shr-generic dom)
(shr-colorize-region start (point) fgcolor bgcolor)))
(defun shr-tag-style (_cont)
(defun shr-tag-style (_dom)
)
(defun shr-tag-script (_cont)
(defun shr-tag-script (_dom)
)
(defun shr-tag-comment (_cont)
(defun shr-tag-comment (_dom)
)
(defun shr-dom-to-xml (dom)
"Convert DOM into a string containing the xml representation."
(let ((arg " ")
(text "")
url)
(dolist (sub (cdr dom))
(cond
((listp (cdr sub))
;; Ignore external image definitions if required.
;; <image xlink:href="http://TRACKING_URL/"/>
(when (or (not (eq (car sub) 'image))
(not (setq url (cdr (assq ':xlink:href (cdr sub)))))
(not shr-blocked-images)
(not (string-match shr-blocked-images url)))
(setq text (concat text (shr-dom-to-xml sub)))))
((eq (car sub) 'text)
(setq text (concat text (cdr sub))))
(t
(setq arg (concat arg (format "%s=\"%s\" "
(substring (symbol-name (car sub)) 1)
(cdr sub)))))))
(format "<%s%s>%s</%s>"
(car dom)
(substring arg 0 (1- (length arg)))
text
(car dom))))
(with-temp-buffer
(shr-dom-print dom)
(buffer-string)))
(defun shr-tag-svg (cont)
(defun shr-dom-print (dom)
"Convert DOM into a string containing the xml representation."
(insert (format "<%s" (dom-tag dom)))
(dolist (attr (dom-attributes dom))
;; Ignore attributes that start with a colon.
(unless (= (aref (format "%s" (car attr)) 0) ?:)
(insert (format " %s=\"%s\"" (car attr) (cdr attr)))))
(insert ">")
(let (url)
(dolist (elem (dom-children dom))
(when (or (not (eq (dom-tag elem) 'image))
(not (setq url (dom-attr elem ':xlink:href)))
(not shr-blocked-images)
(not (string-match shr-blocked-images url)))
(insert " ")
(shr-dom-print elem))))
(insert (format "</%s>" (dom-tag dom))))
(defun shr-tag-svg (dom)
(when (and (image-type-available-p 'svg)
(not shr-inhibit-images))
(funcall shr-put-image-function
(shr-dom-to-xml (cons 'svg cont))
"SVG Image")))
(funcall shr-put-image-function (shr-dom-to-xml dom) "SVG Image")))
(defun shr-tag-sup (cont)
(defun shr-tag-sup (dom)
(let ((start (point)))
(shr-generic cont)
(shr-generic dom)
(put-text-property start (point) 'display '(raise 0.5))))
(defun shr-tag-sub (cont)
(defun shr-tag-sub (dom)
(let ((start (point)))
(shr-generic cont)
(shr-generic dom)
(put-text-property start (point) 'display '(raise -0.5))))
(defun shr-tag-label (cont)
(shr-generic cont)
(defun shr-tag-label (dom)
(shr-generic dom)
(shr-ensure-paragraph))
(defun shr-tag-p (cont)
(defun shr-tag-p (dom)
(shr-ensure-paragraph)
(shr-indent)
(shr-generic cont)
(shr-generic dom)
(shr-ensure-paragraph))
(defun shr-tag-div (cont)
(defun shr-tag-div (dom)
(shr-ensure-newline)
(shr-indent)
(shr-generic cont)
(shr-generic dom)
(shr-ensure-newline))
(defun shr-tag-s (cont)
(shr-fontize-cont cont 'shr-strike-through))
(defun shr-tag-s (dom)
(shr-fontize-dom dom 'shr-strike-through))
(defun shr-tag-del (cont)
(shr-fontize-cont cont 'shr-strike-through))
(defun shr-tag-del (dom)
(shr-fontize-dom dom 'shr-strike-through))
(defun shr-tag-b (cont)
(shr-fontize-cont cont 'bold))
(defun shr-tag-b (dom)
(shr-fontize-dom dom 'bold))
(defun shr-tag-i (cont)
(shr-fontize-cont cont 'italic))
(defun shr-tag-i (dom)
(shr-fontize-dom dom 'italic))
(defun shr-tag-em (cont)
(shr-fontize-cont cont 'italic))
(defun shr-tag-em (dom)
(shr-fontize-dom dom 'italic))
(defun shr-tag-strong (cont)
(shr-fontize-cont cont 'bold))
(defun shr-tag-strong (dom)
(shr-fontize-dom dom 'bold))
(defun shr-tag-u (cont)
(shr-fontize-cont cont 'underline))
(defun shr-tag-u (dom)
(shr-fontize-dom dom 'underline))
(defun shr-parse-style (style)
(when style
@@ -1134,20 +1095,19 @@ ones, in case fg and bg are nil."
plist)))))
plist)))
(defun shr-tag-base (cont)
(let ((base (cdr (assq :href cont))))
(when base
(setq shr-base (shr-parse-base base))))
(shr-generic cont))
(defun shr-tag-base (dom)
(when-let (base (dom-attr dom 'href))
(setq shr-base (shr-parse-base base)))
(shr-generic dom))
(defun shr-tag-a (cont)
(let ((url (cdr (assq :href cont)))
(title (cdr (assq :title cont)))
(defun shr-tag-a (dom)
(let ((url (dom-attr dom 'href))
(title (dom-attr dom 'title))
(start (point))
shr-start)
(shr-generic cont)
(shr-generic dom)
(when (and shr-target-id
(equal (cdr (assq :name cont)) shr-target-id))
(equal (dom-attr dom 'name) shr-target-id))
;; We have a zero-length <a name="foo"> element, so just
;; insert... something.
(when (= start (point))
@@ -1158,33 +1118,33 @@ ones, in case fg and bg are nil."
(not shr-inhibit-decoration))
(shr-urlify (or shr-start start) (shr-expand-url url) title))))
(defun shr-tag-object (cont)
(defun shr-tag-object (dom)
(unless shr-inhibit-images
(let ((start (point))
url multimedia image)
(dolist (elem cont)
(when-let (type (dom-attr dom 'type))
(when (string-match "\\`image/svg" type)
(setq url (dom-attr dom 'data)
image t)))
(dolist (child (dom-children dom))
(cond
((eq (car elem) 'embed)
(setq url (or url (cdr (assq :src (cdr elem))))
((eq (dom-tag child) 'embed)
(setq url (or url (dom-attr child 'src))
multimedia t))
((and (eq (car elem) 'param)
(equal (cdr (assq :name (cdr elem))) "movie"))
(setq url (or url (cdr (assq :value (cdr elem))))
multimedia t))
((and (eq (car elem) :type)
(string-match "\\`image/svg" (cdr elem)))
(setq url (cdr (assq :data cont))
image t))))
((and (eq (dom-tag child) 'param)
(equal (dom-attr child 'name) "movie"))
(setq url (or url (dom-attr child 'value))
multimedia t))))
(when url
(cond
(image
(shr-tag-img cont url)
(setq cont nil))
(shr-tag-img dom url)
(setq dom nil))
(multimedia
(shr-insert " [multimedia] ")
(shr-urlify start (shr-expand-url url)))))
(when cont
(shr-generic cont)))))
(when dom
(shr-generic dom)))))
(defcustom shr-prefer-media-type-alist '(("webm" . 1.0)
("ogv" . 1.0)
@@ -1203,10 +1163,10 @@ url if no type is specified. The value should be a float in the range 0.0 to
(defun shr--get-media-pref (elem)
"Determine the preference for ELEM.
The preference is a float determined from `shr-prefer-media-type'."
(let ((type (cdr (assq :type elem)))
(let ((type (dom-attr elem 'type))
(p 0.0))
(unless type
(setq type (cdr (assq :src elem))))
(setq type (dom-attr elem 'src)))
(when type
(dolist (pref shr-prefer-media-type-alist)
(when (and
@@ -1215,61 +1175,61 @@ The preference is a float determined from `shr-prefer-media-type'."
(setq p (cdr pref)))))
p))
(defun shr--extract-best-source (cont &optional url pref)
"Extract the best `:src' property from <source> blocks in CONT."
(defun shr--extract-best-source (dom &optional url pref)
"Extract the best `:src' property from <source> blocks in DOM."
(setq pref (or pref -1.0))
(let (new-pref)
(dolist (elem cont)
(when (and (eq (car elem) 'source)
(dolist (elem (dom-children dom))
(when (and (eq (dom-tag elem) 'source)
(< pref
(setq new-pref
(shr--get-media-pref elem))))
(setq pref new-pref
url (cdr (assq :src elem)))
url (dom-attr elem 'src))
;; libxml's html parser isn't HTML5 compliant and non terminated
;; source tags might end up as children. So recursion it is...
(dolist (child (cdr elem))
(when (eq (car child) 'source)
(dolist (child (dom-children elem))
(when (eq (dom-tag child) 'source)
(let ((ret (shr--extract-best-source (list child) url pref)))
(when (< pref (cdr ret))
(setq url (car ret)
pref (cdr ret)))))))))
(cons url pref))
(defun shr-tag-video (cont)
(let ((image (cdr (assq :poster cont)))
(url (cdr (assq :src cont)))
(defun shr-tag-video (dom)
(let ((image (dom-attr dom 'poster))
(url (dom-attr dom 'src))
(start (point)))
(unless url
(setq url (car (shr--extract-best-source cont))))
(setq url (car (shr--extract-best-source dom))))
(if image
(shr-tag-img nil image)
(shr-insert " [video] "))
(shr-urlify start (shr-expand-url url))))
(defun shr-tag-audio (cont)
(let ((url (cdr (assq :src cont)))
(defun shr-tag-audio (dom)
(let ((url (dom-attr dom 'src))
(start (point)))
(unless url
(setq url (car (shr--extract-best-source cont))))
(setq url (car (shr--extract-best-source dom))))
(shr-insert " [audio] ")
(shr-urlify start (shr-expand-url url))))
(defun shr-tag-img (cont &optional url)
(defun shr-tag-img (dom &optional url)
(when (or url
(and cont
(> (length (cdr (assq :src cont))) 0)))
(and dom
(> (length (dom-attr dom 'src)) 0)))
(when (and (> (current-column) 0)
(not (eq shr-state 'image)))
(insert "\n"))
(let ((alt (cdr (assq :alt cont)))
(url (shr-expand-url (or url (cdr (assq :src cont))))))
(let ((alt (dom-attr dom 'alt))
(url (shr-expand-url (or url (dom-attr dom 'src)))))
(let ((start (point-marker)))
(when (zerop (length alt))
(setq alt "*"))
(cond
((or (member (cdr (assq :height cont)) '("0" "1"))
(member (cdr (assq :width cont)) '("0" "1")))
((or (member (dom-attr dom 'height) '("0" "1"))
(member (dom-attr dom 'width) '("0" "1")))
;; Ignore zero-sized or single-pixel images.
)
((and (not shr-inhibit-images)
@@ -1315,52 +1275,51 @@ The preference is a float determined from `shr-prefer-media-type'."
(put-text-property start (point) 'image-displayer
(shr-image-displayer shr-content-function))
(put-text-property start (point) 'help-echo
(or (cdr (assq :title cont))
alt)))
(or (dom-attr dom 'title) alt)))
(setq shr-state 'image)))))
(defun shr-tag-pre (cont)
(defun shr-tag-pre (dom)
(let ((shr-folding-mode 'none))
(shr-ensure-newline)
(shr-indent)
(shr-generic cont)
(shr-generic dom)
(shr-ensure-newline)))
(defun shr-tag-blockquote (cont)
(defun shr-tag-blockquote (dom)
(shr-ensure-paragraph)
(shr-indent)
(let ((shr-indentation (+ shr-indentation 4)))
(shr-generic cont))
(shr-generic dom))
(shr-ensure-paragraph))
(defun shr-tag-dl (cont)
(defun shr-tag-dl (dom)
(shr-ensure-paragraph)
(shr-generic cont)
(shr-generic dom)
(shr-ensure-paragraph))
(defun shr-tag-dt (cont)
(defun shr-tag-dt (dom)
(shr-ensure-newline)
(shr-generic cont)
(shr-generic dom)
(shr-ensure-newline))
(defun shr-tag-dd (cont)
(defun shr-tag-dd (dom)
(shr-ensure-newline)
(let ((shr-indentation (+ shr-indentation 4)))
(shr-generic cont)))
(shr-generic dom)))
(defun shr-tag-ul (cont)
(defun shr-tag-ul (dom)
(shr-ensure-paragraph)
(let ((shr-list-mode 'ul))
(shr-generic cont))
(shr-generic dom))
(shr-ensure-paragraph))
(defun shr-tag-ol (cont)
(defun shr-tag-ol (dom)
(shr-ensure-paragraph)
(let ((shr-list-mode 1))
(shr-generic cont))
(shr-generic dom))
(shr-ensure-paragraph))
(defun shr-tag-li (cont)
(defun shr-tag-li (dom)
(shr-ensure-newline)
(shr-indent)
(let* ((bullet
@@ -1371,9 +1330,9 @@ The preference is a float determined from `shr-prefer-media-type'."
shr-bullet))
(shr-indentation (+ shr-indentation (length bullet))))
(insert bullet)
(shr-generic cont)))
(shr-generic dom)))
(defun shr-tag-br (cont)
(defun shr-tag-br (dom)
(when (and (not (bobp))
;; Only add a newline if we break the current line, or
;; the previous line isn't a blank line.
@@ -1382,42 +1341,42 @@ The preference is a float determined from `shr-prefer-media-type'."
(not (= (char-after (- (point) 2)) ?\n)))))
(insert "\n")
(shr-indent))
(shr-generic cont))
(shr-generic dom))
(defun shr-tag-span (cont)
(shr-generic cont))
(defun shr-tag-span (dom)
(shr-generic dom))
(defun shr-tag-h1 (cont)
(shr-heading cont 'bold 'underline))
(defun shr-tag-h1 (dom)
(shr-heading dom 'bold 'underline))
(defun shr-tag-h2 (cont)
(shr-heading cont 'bold))
(defun shr-tag-h2 (dom)
(shr-heading dom 'bold))
(defun shr-tag-h3 (cont)
(shr-heading cont 'italic))
(defun shr-tag-h3 (dom)
(shr-heading dom 'italic))
(defun shr-tag-h4 (cont)
(shr-heading cont))
(defun shr-tag-h4 (dom)
(shr-heading dom))
(defun shr-tag-h5 (cont)
(shr-heading cont))
(defun shr-tag-h5 (dom)
(shr-heading dom))
(defun shr-tag-h6 (cont)
(shr-heading cont))
(defun shr-tag-h6 (dom)
(shr-heading dom))
(defun shr-tag-hr (_cont)
(defun shr-tag-hr (_dom)
(shr-ensure-newline)
(insert (make-string shr-internal-width shr-hr-line) "\n"))
(defun shr-tag-title (cont)
(shr-heading cont 'bold 'underline))
(defun shr-tag-title (dom)
(shr-heading dom 'bold 'underline))
(defun shr-tag-font (cont)
(defun shr-tag-font (dom)
(let* ((start (point))
(color (cdr (assq :color cont)))
(color (dom-attr dom 'color))
(shr-stylesheet (nconc (list (cons 'color color))
shr-stylesheet)))
(shr-generic cont)
(shr-generic dom)
(when color
(shr-colorize-region start (point) color
(cdr (assq 'background-color shr-stylesheet))))))
@@ -1432,23 +1391,22 @@ The preference is a float determined from `shr-prefer-media-type'."
;; main buffer). Now we know how much space each TD really takes, so
;; we then render everything again with the new widths, and finally
;; insert all these boxes into the main buffer.
(defun shr-tag-table-1 (cont)
(setq cont (or (cdr (assq 'tbody cont))
cont))
(defun shr-tag-table-1 (dom)
(setq dom (or (dom-child-by-tag dom 'tbody) dom))
(let* ((shr-inhibit-images t)
(shr-table-depth (1+ shr-table-depth))
(shr-kinsoku-shorten t)
;; Find all suggested widths.
(columns (shr-column-specs cont))
(columns (shr-column-specs dom))
;; Compute how many characters wide each TD should be.
(suggested-widths (shr-pro-rate-columns columns))
;; Do a "test rendering" to see how big each TD is (this can
;; be smaller (if there's little text) or bigger (if there's
;; unbreakable text).
(sketch (shr-make-table cont suggested-widths))
(sketch (shr-make-table dom suggested-widths))
;; Compute the "natural" width by setting each column to 500
;; characters and see how wide they really render.
(natural (shr-make-table cont (make-vector (length columns) 500)))
(natural (shr-make-table dom (make-vector (length columns) 500)))
(sketch-widths (shr-table-widths sketch natural suggested-widths)))
;; This probably won't work very well.
(when (> (+ (loop for width across sketch-widths
@@ -1457,15 +1415,15 @@ The preference is a float determined from `shr-prefer-media-type'."
(frame-width))
(setq truncate-lines t))
;; Then render the table again with these new "hard" widths.
(shr-insert-table (shr-make-table cont sketch-widths t) sketch-widths)))
(shr-insert-table (shr-make-table dom sketch-widths t) sketch-widths)))
(defun shr-tag-table (cont)
(defun shr-tag-table (dom)
(shr-ensure-paragraph)
(let* ((caption (cdr (assq 'caption cont)))
(header (cdr (assq 'thead cont)))
(body (or (cdr (assq 'tbody cont)) cont))
(footer (cdr (assq 'tfoot cont)))
(bgcolor (cdr (assq :bgcolor cont)))
(let* ((caption (dom-child-by-tag dom 'caption))
(header (dom-child-by-tag dom 'thead))
(body (or (dom-child-by-tag dom 'tbody) dom))
(footer (dom-child-by-tag dom 'tfoot))
(bgcolor (dom-attr dom 'bgcolor))
(start (point))
(shr-stylesheet (nconc (list (cons 'background-color bgcolor))
shr-stylesheet))
@@ -1474,12 +1432,12 @@ The preference is a float determined from `shr-prefer-media-type'."
(nfooter (if footer (shr-max-columns footer))))
(if (and (not caption)
(not header)
(not (cdr (assq 'tbody cont)))
(not (cdr (assq 'tr cont)))
(not (dom-child-by-tag dom 'tbody))
(not (dom-child-by-tag dom 'tr))
(not footer))
;; The table is totally invalid and just contains random junk.
;; Try to output it anyway.
(shr-generic cont)
(shr-generic dom)
;; It's a real table, so render it.
(shr-tag-table-1
(nconc
@@ -1526,19 +1484,10 @@ The preference is a float determined from `shr-prefer-media-type'."
;; model isn't strong enough to allow us to put the images actually
;; into the tables.
(when (zerop shr-table-depth)
(dolist (elem (shr-find-elements cont 'object))
(shr-tag-object (cdr elem)))
(dolist (elem (shr-find-elements cont 'img))
(shr-tag-img (cdr elem))))))
(defun shr-find-elements (cont type)
(let (result)
(dolist (elem cont)
(cond ((eq (car elem) type)
(push elem result))
((consp (cdr elem))
(setq result (nconc (shr-find-elements (cdr elem) type) result)))))
(nreverse result)))
(dolist (elem (dom-by-tag dom 'object))
(shr-tag-object elem))
(dolist (elem (dom-by-tag dom 'img))
(shr-tag-img elem)))))
(defun shr-insert-table (table widths)
(let* ((collapse (equal (cdr (assq 'border-collapse shr-stylesheet))
@@ -1621,22 +1570,22 @@ The preference is a float determined from `shr-prefer-media-type'."
(aref widths i))))))))
widths))
(defun shr-make-table (cont widths &optional fill)
(or (cadr (assoc (list cont widths fill) shr-content-cache))
(let ((data (shr-make-table-1 cont widths fill)))
(push (list (list cont widths fill) data)
(defun shr-make-table (dom widths &optional fill)
(or (cadr (assoc (list dom widths fill) shr-content-cache))
(let ((data (shr-make-table-1 dom widths fill)))
(push (list (list dom widths fill) data)
shr-content-cache)
data)))
(defun shr-make-table-1 (cont widths &optional fill)
(defun shr-make-table-1 (dom widths &optional fill)
(let ((trs nil)
(shr-inhibit-decoration (not fill))
(rowspans (make-vector (length widths) 0))
width colspan)
(dolist (row cont)
(when (eq (car row) 'tr)
(dolist (row (dom-children dom))
(when (eq (dom-tag row) 'tr)
(let ((tds nil)
(columns (cdr row))
(columns (dom-children row))
(i 0)
(width-column 0)
column)
@@ -1650,12 +1599,12 @@ The preference is a float determined from `shr-prefer-media-type'."
(pop columns)
(aset rowspans i (1- (aref rowspans i)))
'(td)))
(when (or (memq (car column) '(td th))
(not column))
(when (cdr (assq :rowspan (cdr column)))
(when (and (not (stringp column))
(or (memq (dom-tag column) '(td th))
(not column)))
(when-let (span (dom-attr column 'rowspan))
(aset rowspans i (+ (aref rowspans i)
(1- (string-to-number
(cdr (assq :rowspan (cdr column))))))))
(1- (string-to-number span)))))
;; Sanity check for invalid column-spans.
(when (>= width-column (length widths))
(setq width-column 0))
@@ -1664,7 +1613,7 @@ The preference is a float determined from `shr-prefer-media-type'."
(aref widths width-column)
10))
(when (and fill
(setq colspan (cdr (assq :colspan (cdr column)))))
(setq colspan (dom-attr column colspan)))
(setq colspan (min (string-to-number colspan)
;; The colspan may be wrong, so
;; truncate it to the length of the
@@ -1679,18 +1628,18 @@ The preference is a float determined from `shr-prefer-media-type'."
(setq width-column (+ width-column (1- colspan))))
(when (or column
(not fill))
(push (shr-render-td (cdr column) width fill)
(push (shr-render-td column width fill)
tds))
(setq i (1+ i)
width-column (1+ width-column))))
(push (nreverse tds) trs))))
(nreverse trs)))
(defun shr-render-td (cont width fill)
(defun shr-render-td (dom width fill)
(with-temp-buffer
(let ((bgcolor (cdr (assq :bgcolor cont)))
(fgcolor (cdr (assq :fgcolor cont)))
(style (cdr (assq :style cont)))
(let ((bgcolor (dom-attr dom 'bgcolor))
(fgcolor (dom-attr dom 'fgcolor))
(style (dom-attr dom 'style))
(shr-stylesheet shr-stylesheet)
actual-colors)
(when style
@@ -1704,7 +1653,7 @@ The preference is a float determined from `shr-prefer-media-type'."
(setq shr-stylesheet (append style shr-stylesheet)))
(let ((shr-internal-width width)
(shr-indentation 0))
(shr-descend (cons 'td cont)))
(shr-descend dom))
;; Delete padding at the bottom of the TDs.
(delete-region
(point)
@@ -1725,7 +1674,7 @@ The preference is a float determined from `shr-prefer-media-type'."
(if (zerop (buffer-size))
(insert (make-string width ? ))
;; Otherwise, fill the buffer.
(let ((align (cdr (assq :align cont)))
(let ((align (dom-attr dom 'align))
length)
(while (not (eobp))
(end-of-line)
@@ -1780,14 +1729,15 @@ The preference is a float determined from `shr-prefer-media-type'."
widths))
;; Return a summary of the number and shape of the TDs in the table.
(defun shr-column-specs (cont)
(let ((columns (make-vector (shr-max-columns cont) 1)))
(dolist (row cont)
(when (eq (car row) 'tr)
(defun shr-column-specs (dom)
(let ((columns (make-vector (shr-max-columns dom) 1)))
(dolist (row (dom-children dom))
(when (eq (dom-tag row) 'tr)
(let ((i 0))
(dolist (column (cdr row))
(when (memq (car column) '(td th))
(let ((width (cdr (assq :width (cdr column)))))
(dolist (column (dom-children row))
(when (and (not (stringp column))
(memq (dom-tag column) '(td th)))
(let ((width (dom-attr column 'width)))
(when (and width
(string-match "\\([0-9]+\\)%" width)
(not (zerop (setq width (string-to-number
@@ -1796,19 +1746,20 @@ The preference is a float determined from `shr-prefer-media-type'."
(setq i (1+ i)))))))
columns))
(defun shr-count (cont elem)
(defun shr-count (dom elem)
(let ((i 0))
(dolist (sub cont)
(when (eq (car sub) elem)
(dolist (sub (dom-children dom))
(when (and (not (stringp sub))
(eq (dom-tag sub) elem))
(setq i (1+ i))))
i))
(defun shr-max-columns (cont)
(defun shr-max-columns (dom)
(let ((max 0))
(dolist (row cont)
(when (eq (car row) 'tr)
(setq max (max max (+ (shr-count (cdr row) 'td)
(shr-count (cdr row) 'th))))))
(dolist (row (dom-children dom))
(when (eq (dom-tag row) 'tr)
(setq max (max max (+ (shr-count row 'td)
(shr-count row 'th))))))
max))
(provide 'shr)