Simple web-scraper in Common Lisp (SBCL)
I have written a simple web-scraper in Common Lisp, & would greatly appreciate any feedback:
(defpackage :myfitnessdata
(:use :common-lisp)
(:export #:main))
(in-package :myfitnessdata)
(require :sb-posix)
(load (merge-pathnames "quicklisp/setup.lisp" (user-homedir-pathname)))
(ql:quickload '("drakma"
"closure-html"
"cxml-stp"
"net-telent-date"))
(defun show-usage ()
(format t "MyFitnessData - a CSV web scraper for the MyFitnessPal website.~%")
;; snip
(format t "'c:\Users\bob\weights.csv', overwriting it if it exists.~%"))
(defun login (username password)
"Logs in to www.myfitnesspal.com. Returns a cookie-jar containing authentication details."
(let ((cookie-jar (make-instance 'drakma:cookie-jar)))
(drakma:http-request "http://www.myfitnesspal.com/account/login"
:method :post
:parameters `(("username" . ,username) ("password" . ,password))
:cookie-jar cookie-jar)
cookie-jar))
(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(let ((logged-in? nil))
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
(if (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
(drakma:cookie-value cookie))
(setq logged-in? t)))
logged-in?))
(defun get-page (page-num cookie-jar)
"Downloads a potentially invalid HTML page containing data to scrape. Returns a string containing the HTML."
(let ((url (concatenate 'string "http://www.myfitnesspal.com/measurements/edit?type=1&page=" (write-to-string page-num))))
(let ((body (drakma:http-request url :cookie-jar cookie-jar)))
(if (search "No measurements found." body)
nil
body))))
(defun scrape-body (body)
"Scrapes data from a potentially invalid HTML document, returning a list of lists of values."
(let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
(let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
(scrape-xhtml xhtml-tree))))
(defun scrape-xhtml (xhtml-tree)
"Scrapes data from an XHTML tree, returning a list of lists of values."
(let ((results nil))
(stp:do-recursively (element xhtml-tree)
(when (and (typep element 'stp:element)
(equal (stp:local-name element) "tr"))
(if (scrape-row element)
(setq results (append results (list (scrape-row element)))))))
results))
(defun scrape-row (row)
"Scrapes data from a table row into a list of values."
(if (equal 4 (stp:number-of-children row))
(let ((measurement-type (nth-child-data 0 row))
(measurement-date (nth-child-data 1 row))
(measurement-value (nth-child-data 2 row)))
(if (not (equal measurement-type "Measurement"))
(list measurement-date measurement-value)))))
(defun nth-child-data (number row)
(stp:data (stp:nth-child 0 (stp:nth-child number row))))
(defun recursive-scrape-page (page-num cookie-jar)
"Recursively scrapes data from a page and all successive pages. Returns a list of lists of values."
(let ((body (get-page page-num cookie-jar)))
(if body
(append (scrape-body body)
(recursive-scrape-page (+ 1 page-num) cookie-jar)))))
(defun show-login-failure ()
(format t "Login failed.~%"))
(defun write-csv (data csv-pathname)
"Takes a list of lists of values, converts them to CSV, and writes them to a file."
(with-open-file (stream csv-pathname
:direction :output
:if-exists :overwrite
:if-does-not-exist :create)
(format stream (make-csv data))))
(defun separate-values (value-list)
"Takes a list of values, and returns a string containing a CSV row that represents the values."
(format nil "~{~A~^,~}" value-list))
(defun make-csv (list)
"Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
(let ((csv "")
(sorted-list (sort list #'first-column-as-date-ascending)))
(mapcar (lambda (row) (setq csv (concatenate 'string csv (separate-values row) (format nil "~%")))) sorted-list)
csv))
(defun first-column-as-date-ascending (first-row second-row)
"Compares two rows by their first column, which is parsed as a time."
(< (net.telent.date:parse-time (car first-row))
(net.telent.date:parse-time (car second-row))))
(defun scrape (username password csv-pathname)
"Attempts to log in, and if successful scrapes all data to the file specified by csv-pathname."
(let ((cookie-jar (login username password)))
(if (logged-in? cookie-jar)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
(show-login-failure))))
(defun main (args)
"The entry point for the application when compiled with buildapp."
(if (= (length args) 4)
(let ((username (nth 1 args))
(password (nth 2 args))
(csv-pathname (nth 3 args)))
(scrape username password csv-pathname))
(show-usage)))
There are several areas I'm unsure about, & would particularly appreciate feedback on:
- use of let & setq (I've got this wrong in the past)
- structure, naming & comments (as a Lisper, would you like to inherit this codebase?)
The entire app is here on GitHub if you're interested.
common-lisp
add a comment |
I have written a simple web-scraper in Common Lisp, & would greatly appreciate any feedback:
(defpackage :myfitnessdata
(:use :common-lisp)
(:export #:main))
(in-package :myfitnessdata)
(require :sb-posix)
(load (merge-pathnames "quicklisp/setup.lisp" (user-homedir-pathname)))
(ql:quickload '("drakma"
"closure-html"
"cxml-stp"
"net-telent-date"))
(defun show-usage ()
(format t "MyFitnessData - a CSV web scraper for the MyFitnessPal website.~%")
;; snip
(format t "'c:\Users\bob\weights.csv', overwriting it if it exists.~%"))
(defun login (username password)
"Logs in to www.myfitnesspal.com. Returns a cookie-jar containing authentication details."
(let ((cookie-jar (make-instance 'drakma:cookie-jar)))
(drakma:http-request "http://www.myfitnesspal.com/account/login"
:method :post
:parameters `(("username" . ,username) ("password" . ,password))
:cookie-jar cookie-jar)
cookie-jar))
(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(let ((logged-in? nil))
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
(if (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
(drakma:cookie-value cookie))
(setq logged-in? t)))
logged-in?))
(defun get-page (page-num cookie-jar)
"Downloads a potentially invalid HTML page containing data to scrape. Returns a string containing the HTML."
(let ((url (concatenate 'string "http://www.myfitnesspal.com/measurements/edit?type=1&page=" (write-to-string page-num))))
(let ((body (drakma:http-request url :cookie-jar cookie-jar)))
(if (search "No measurements found." body)
nil
body))))
(defun scrape-body (body)
"Scrapes data from a potentially invalid HTML document, returning a list of lists of values."
(let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
(let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
(scrape-xhtml xhtml-tree))))
(defun scrape-xhtml (xhtml-tree)
"Scrapes data from an XHTML tree, returning a list of lists of values."
(let ((results nil))
(stp:do-recursively (element xhtml-tree)
(when (and (typep element 'stp:element)
(equal (stp:local-name element) "tr"))
(if (scrape-row element)
(setq results (append results (list (scrape-row element)))))))
results))
(defun scrape-row (row)
"Scrapes data from a table row into a list of values."
(if (equal 4 (stp:number-of-children row))
(let ((measurement-type (nth-child-data 0 row))
(measurement-date (nth-child-data 1 row))
(measurement-value (nth-child-data 2 row)))
(if (not (equal measurement-type "Measurement"))
(list measurement-date measurement-value)))))
(defun nth-child-data (number row)
(stp:data (stp:nth-child 0 (stp:nth-child number row))))
(defun recursive-scrape-page (page-num cookie-jar)
"Recursively scrapes data from a page and all successive pages. Returns a list of lists of values."
(let ((body (get-page page-num cookie-jar)))
(if body
(append (scrape-body body)
(recursive-scrape-page (+ 1 page-num) cookie-jar)))))
(defun show-login-failure ()
(format t "Login failed.~%"))
(defun write-csv (data csv-pathname)
"Takes a list of lists of values, converts them to CSV, and writes them to a file."
(with-open-file (stream csv-pathname
:direction :output
:if-exists :overwrite
:if-does-not-exist :create)
(format stream (make-csv data))))
(defun separate-values (value-list)
"Takes a list of values, and returns a string containing a CSV row that represents the values."
(format nil "~{~A~^,~}" value-list))
(defun make-csv (list)
"Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
(let ((csv "")
(sorted-list (sort list #'first-column-as-date-ascending)))
(mapcar (lambda (row) (setq csv (concatenate 'string csv (separate-values row) (format nil "~%")))) sorted-list)
csv))
(defun first-column-as-date-ascending (first-row second-row)
"Compares two rows by their first column, which is parsed as a time."
(< (net.telent.date:parse-time (car first-row))
(net.telent.date:parse-time (car second-row))))
(defun scrape (username password csv-pathname)
"Attempts to log in, and if successful scrapes all data to the file specified by csv-pathname."
(let ((cookie-jar (login username password)))
(if (logged-in? cookie-jar)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
(show-login-failure))))
(defun main (args)
"The entry point for the application when compiled with buildapp."
(if (= (length args) 4)
(let ((username (nth 1 args))
(password (nth 2 args))
(csv-pathname (nth 3 args)))
(scrape username password csv-pathname))
(show-usage)))
There are several areas I'm unsure about, & would particularly appreciate feedback on:
- use of let & setq (I've got this wrong in the past)
- structure, naming & comments (as a Lisper, would you like to inherit this codebase?)
The entire app is here on GitHub if you're interested.
common-lisp
I always feel interested in every CL projects . Hey thank to your codebase, now I know how to use defpackage :D
– Dark Cloud
May 6 '11 at 16:19
add a comment |
I have written a simple web-scraper in Common Lisp, & would greatly appreciate any feedback:
(defpackage :myfitnessdata
(:use :common-lisp)
(:export #:main))
(in-package :myfitnessdata)
(require :sb-posix)
(load (merge-pathnames "quicklisp/setup.lisp" (user-homedir-pathname)))
(ql:quickload '("drakma"
"closure-html"
"cxml-stp"
"net-telent-date"))
(defun show-usage ()
(format t "MyFitnessData - a CSV web scraper for the MyFitnessPal website.~%")
;; snip
(format t "'c:\Users\bob\weights.csv', overwriting it if it exists.~%"))
(defun login (username password)
"Logs in to www.myfitnesspal.com. Returns a cookie-jar containing authentication details."
(let ((cookie-jar (make-instance 'drakma:cookie-jar)))
(drakma:http-request "http://www.myfitnesspal.com/account/login"
:method :post
:parameters `(("username" . ,username) ("password" . ,password))
:cookie-jar cookie-jar)
cookie-jar))
(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(let ((logged-in? nil))
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
(if (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
(drakma:cookie-value cookie))
(setq logged-in? t)))
logged-in?))
(defun get-page (page-num cookie-jar)
"Downloads a potentially invalid HTML page containing data to scrape. Returns a string containing the HTML."
(let ((url (concatenate 'string "http://www.myfitnesspal.com/measurements/edit?type=1&page=" (write-to-string page-num))))
(let ((body (drakma:http-request url :cookie-jar cookie-jar)))
(if (search "No measurements found." body)
nil
body))))
(defun scrape-body (body)
"Scrapes data from a potentially invalid HTML document, returning a list of lists of values."
(let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
(let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
(scrape-xhtml xhtml-tree))))
(defun scrape-xhtml (xhtml-tree)
"Scrapes data from an XHTML tree, returning a list of lists of values."
(let ((results nil))
(stp:do-recursively (element xhtml-tree)
(when (and (typep element 'stp:element)
(equal (stp:local-name element) "tr"))
(if (scrape-row element)
(setq results (append results (list (scrape-row element)))))))
results))
(defun scrape-row (row)
"Scrapes data from a table row into a list of values."
(if (equal 4 (stp:number-of-children row))
(let ((measurement-type (nth-child-data 0 row))
(measurement-date (nth-child-data 1 row))
(measurement-value (nth-child-data 2 row)))
(if (not (equal measurement-type "Measurement"))
(list measurement-date measurement-value)))))
(defun nth-child-data (number row)
(stp:data (stp:nth-child 0 (stp:nth-child number row))))
(defun recursive-scrape-page (page-num cookie-jar)
"Recursively scrapes data from a page and all successive pages. Returns a list of lists of values."
(let ((body (get-page page-num cookie-jar)))
(if body
(append (scrape-body body)
(recursive-scrape-page (+ 1 page-num) cookie-jar)))))
(defun show-login-failure ()
(format t "Login failed.~%"))
(defun write-csv (data csv-pathname)
"Takes a list of lists of values, converts them to CSV, and writes them to a file."
(with-open-file (stream csv-pathname
:direction :output
:if-exists :overwrite
:if-does-not-exist :create)
(format stream (make-csv data))))
(defun separate-values (value-list)
"Takes a list of values, and returns a string containing a CSV row that represents the values."
(format nil "~{~A~^,~}" value-list))
(defun make-csv (list)
"Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
(let ((csv "")
(sorted-list (sort list #'first-column-as-date-ascending)))
(mapcar (lambda (row) (setq csv (concatenate 'string csv (separate-values row) (format nil "~%")))) sorted-list)
csv))
(defun first-column-as-date-ascending (first-row second-row)
"Compares two rows by their first column, which is parsed as a time."
(< (net.telent.date:parse-time (car first-row))
(net.telent.date:parse-time (car second-row))))
(defun scrape (username password csv-pathname)
"Attempts to log in, and if successful scrapes all data to the file specified by csv-pathname."
(let ((cookie-jar (login username password)))
(if (logged-in? cookie-jar)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
(show-login-failure))))
(defun main (args)
"The entry point for the application when compiled with buildapp."
(if (= (length args) 4)
(let ((username (nth 1 args))
(password (nth 2 args))
(csv-pathname (nth 3 args)))
(scrape username password csv-pathname))
(show-usage)))
There are several areas I'm unsure about, & would particularly appreciate feedback on:
- use of let & setq (I've got this wrong in the past)
- structure, naming & comments (as a Lisper, would you like to inherit this codebase?)
The entire app is here on GitHub if you're interested.
common-lisp
I have written a simple web-scraper in Common Lisp, & would greatly appreciate any feedback:
(defpackage :myfitnessdata
(:use :common-lisp)
(:export #:main))
(in-package :myfitnessdata)
(require :sb-posix)
(load (merge-pathnames "quicklisp/setup.lisp" (user-homedir-pathname)))
(ql:quickload '("drakma"
"closure-html"
"cxml-stp"
"net-telent-date"))
(defun show-usage ()
(format t "MyFitnessData - a CSV web scraper for the MyFitnessPal website.~%")
;; snip
(format t "'c:\Users\bob\weights.csv', overwriting it if it exists.~%"))
(defun login (username password)
"Logs in to www.myfitnesspal.com. Returns a cookie-jar containing authentication details."
(let ((cookie-jar (make-instance 'drakma:cookie-jar)))
(drakma:http-request "http://www.myfitnesspal.com/account/login"
:method :post
:parameters `(("username" . ,username) ("password" . ,password))
:cookie-jar cookie-jar)
cookie-jar))
(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(let ((logged-in? nil))
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
(if (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
(drakma:cookie-value cookie))
(setq logged-in? t)))
logged-in?))
(defun get-page (page-num cookie-jar)
"Downloads a potentially invalid HTML page containing data to scrape. Returns a string containing the HTML."
(let ((url (concatenate 'string "http://www.myfitnesspal.com/measurements/edit?type=1&page=" (write-to-string page-num))))
(let ((body (drakma:http-request url :cookie-jar cookie-jar)))
(if (search "No measurements found." body)
nil
body))))
(defun scrape-body (body)
"Scrapes data from a potentially invalid HTML document, returning a list of lists of values."
(let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
(let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
(scrape-xhtml xhtml-tree))))
(defun scrape-xhtml (xhtml-tree)
"Scrapes data from an XHTML tree, returning a list of lists of values."
(let ((results nil))
(stp:do-recursively (element xhtml-tree)
(when (and (typep element 'stp:element)
(equal (stp:local-name element) "tr"))
(if (scrape-row element)
(setq results (append results (list (scrape-row element)))))))
results))
(defun scrape-row (row)
"Scrapes data from a table row into a list of values."
(if (equal 4 (stp:number-of-children row))
(let ((measurement-type (nth-child-data 0 row))
(measurement-date (nth-child-data 1 row))
(measurement-value (nth-child-data 2 row)))
(if (not (equal measurement-type "Measurement"))
(list measurement-date measurement-value)))))
(defun nth-child-data (number row)
(stp:data (stp:nth-child 0 (stp:nth-child number row))))
(defun recursive-scrape-page (page-num cookie-jar)
"Recursively scrapes data from a page and all successive pages. Returns a list of lists of values."
(let ((body (get-page page-num cookie-jar)))
(if body
(append (scrape-body body)
(recursive-scrape-page (+ 1 page-num) cookie-jar)))))
(defun show-login-failure ()
(format t "Login failed.~%"))
(defun write-csv (data csv-pathname)
"Takes a list of lists of values, converts them to CSV, and writes them to a file."
(with-open-file (stream csv-pathname
:direction :output
:if-exists :overwrite
:if-does-not-exist :create)
(format stream (make-csv data))))
(defun separate-values (value-list)
"Takes a list of values, and returns a string containing a CSV row that represents the values."
(format nil "~{~A~^,~}" value-list))
(defun make-csv (list)
"Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
(let ((csv "")
(sorted-list (sort list #'first-column-as-date-ascending)))
(mapcar (lambda (row) (setq csv (concatenate 'string csv (separate-values row) (format nil "~%")))) sorted-list)
csv))
(defun first-column-as-date-ascending (first-row second-row)
"Compares two rows by their first column, which is parsed as a time."
(< (net.telent.date:parse-time (car first-row))
(net.telent.date:parse-time (car second-row))))
(defun scrape (username password csv-pathname)
"Attempts to log in, and if successful scrapes all data to the file specified by csv-pathname."
(let ((cookie-jar (login username password)))
(if (logged-in? cookie-jar)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
(show-login-failure))))
(defun main (args)
"The entry point for the application when compiled with buildapp."
(if (= (length args) 4)
(let ((username (nth 1 args))
(password (nth 2 args))
(csv-pathname (nth 3 args)))
(scrape username password csv-pathname))
(show-usage)))
There are several areas I'm unsure about, & would particularly appreciate feedback on:
- use of let & setq (I've got this wrong in the past)
- structure, naming & comments (as a Lisper, would you like to inherit this codebase?)
The entire app is here on GitHub if you're interested.
common-lisp
common-lisp
edited Apr 13 '17 at 12:40
Community♦
1
1
asked May 6 '11 at 9:33
Duncan BayneDuncan Bayne
280111
280111
I always feel interested in every CL projects . Hey thank to your codebase, now I know how to use defpackage :D
– Dark Cloud
May 6 '11 at 16:19
add a comment |
I always feel interested in every CL projects . Hey thank to your codebase, now I know how to use defpackage :D
– Dark Cloud
May 6 '11 at 16:19
I always feel interested in every CL projects . Hey thank to your codebase, now I know how to use defpackage :D
– Dark Cloud
May 6 '11 at 16:19
I always feel interested in every CL projects . Hey thank to your codebase, now I know how to use defpackage :D
– Dark Cloud
May 6 '11 at 16:19
add a comment |
2 Answers
2
active
oldest
votes
You might want to take a look at defining asdf
systems instead of using quicklisp
to load dependencies internally.
The standard way of doing this is to set up an asd
file. Here's a decent walk-through of that process. It's more verbose than ql:quickload
, but it lets people who don't have quicklisp use your package regardless.
On second thought, screw those guys, keep it up.
(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(let ((logged-in? nil))
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
(if (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
(drakma:cookie-value cookie))
(setq logged-in? t)))
logged-in?))
There's actually a loop
shorthand for "make sure each member of list
satisfies predicate
". The above function can be written as
(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar)
always (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com"))))
foo?
is the Scheme convention for predicates. The common CL conventions are foop
or foo-p
. Personally, I prefer foo?
too, just be aware that it's not standard.
...
(sorted-list (sort list #'first-column-as-date-ascending)))
...
This can get you into trouble. The Common Lisp sort
should really be named sort!
, because it's destructive (so sorted-list
will now contain a sorted list, but list
won't still be the unsorted list, and isn't guaranteed to be the complete sequence anymore). If you might use list
again later, instead do
...
(sorted-list (sort (copy-list list) #'first-column-as-date-ascending)))
...
(if (search "No measurements found." body)
nil
body)
Can be written as
(unless (search "No measurements found." body) body)
EDIT:
format
can accept nested iterations in the directive, so you can eliminate separate-values
by writing make-csv
as
(defun make-csv (list)
"Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
(let ((sorted-list (sort list #'first-column-as-date-ascending)))
(format nil "~{~{~A~^,~}~^~%~}" sorted-list)))
You could eliminate make-csv
entirely by putting the above sort+directive directly into write-csv
(this would also save you a trip through the CSV string, which may or may not make a significant difference).
recursive-scrape-page
can be simplified down to
(defun scrape-page (page-num cookie-jar)
(loop for i from page-num
if (get-page i cookie-jar) collect it into pg
else return pg))
As a rule, Common Lisp doesn't guarantee tail-calls the way Scheme does, so it's generally a better idea to use a loop
than raw recursion. SBCL does support some tail calls, but it isn't guaranteed (though this situation looks simple enough that it just might; do some profiling and compare).
You should be able to simplify scrape-xhtml
in a similar way to eliminate (let ((results nil))
.
Note that I haven't tested or profiled any of this since I don't have a "MyFitnessPal" account. Check that it works first.
EDIT the Second:
...
(let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
(let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
...
You use this nested let idiom in a couple of places. I assume this is just because the value of xhtml-tree
depends on the value of valid-html
. In this case, you can instead write
...
(let* ((valid-xhtml (chtml:parse body (cxml:make-string-sink)))
(xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
...
add a comment |
Be careful to indent your code properly to make it more readable. For instance:
(let ((cookie-jar (login username password)))
(if (logged-in? cookie-jar)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
(show-login-failure)))
That piece of code could by the way possibly be improved by an idiomatic WITH-VALID-LOGIN
macro (if you want to practice). It could become...
(with-valid-login (cookie-jar username password)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname))
...with a macro definition like:
(defmacro with-valid-login ((jar user password) &body body)
`(let ((,jar (login ,user ,password)))
(if (logged-in? ,jar)
(progn ,@body)
(show-login-failure))))
add a comment |
Your Answer
StackExchange.ifUsing("editor", function () {
return StackExchange.using("mathjaxEditing", function () {
StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
});
});
}, "mathjax-editing");
StackExchange.ifUsing("editor", function () {
StackExchange.using("externalEditor", function () {
StackExchange.using("snippets", function () {
StackExchange.snippets.init();
});
});
}, "code-snippets");
StackExchange.ready(function() {
var channelOptions = {
tags: "".split(" "),
id: "196"
};
initTagRenderer("".split(" "), "".split(" "), channelOptions);
StackExchange.using("externalEditor", function() {
// Have to fire editor after snippets, if snippets enabled
if (StackExchange.settings.snippets.snippetsEnabled) {
StackExchange.using("snippets", function() {
createEditor();
});
}
else {
createEditor();
}
});
function createEditor() {
StackExchange.prepareEditor({
heartbeatType: 'answer',
autoActivateHeartbeat: false,
convertImagesToLinks: false,
noModals: true,
showLowRepImageUploadWarning: true,
reputationToPostImages: null,
bindNavPrevention: true,
postfix: "",
imageUploader: {
brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
allowUrls: true
},
onDemand: true,
discardSelector: ".discard-answer"
,immediatelyShowMarkdownHelp:true
});
}
});
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f2277%2fsimple-web-scraper-in-common-lisp-sbcl%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
2 Answers
2
active
oldest
votes
2 Answers
2
active
oldest
votes
active
oldest
votes
active
oldest
votes
You might want to take a look at defining asdf
systems instead of using quicklisp
to load dependencies internally.
The standard way of doing this is to set up an asd
file. Here's a decent walk-through of that process. It's more verbose than ql:quickload
, but it lets people who don't have quicklisp use your package regardless.
On second thought, screw those guys, keep it up.
(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(let ((logged-in? nil))
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
(if (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
(drakma:cookie-value cookie))
(setq logged-in? t)))
logged-in?))
There's actually a loop
shorthand for "make sure each member of list
satisfies predicate
". The above function can be written as
(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar)
always (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com"))))
foo?
is the Scheme convention for predicates. The common CL conventions are foop
or foo-p
. Personally, I prefer foo?
too, just be aware that it's not standard.
...
(sorted-list (sort list #'first-column-as-date-ascending)))
...
This can get you into trouble. The Common Lisp sort
should really be named sort!
, because it's destructive (so sorted-list
will now contain a sorted list, but list
won't still be the unsorted list, and isn't guaranteed to be the complete sequence anymore). If you might use list
again later, instead do
...
(sorted-list (sort (copy-list list) #'first-column-as-date-ascending)))
...
(if (search "No measurements found." body)
nil
body)
Can be written as
(unless (search "No measurements found." body) body)
EDIT:
format
can accept nested iterations in the directive, so you can eliminate separate-values
by writing make-csv
as
(defun make-csv (list)
"Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
(let ((sorted-list (sort list #'first-column-as-date-ascending)))
(format nil "~{~{~A~^,~}~^~%~}" sorted-list)))
You could eliminate make-csv
entirely by putting the above sort+directive directly into write-csv
(this would also save you a trip through the CSV string, which may or may not make a significant difference).
recursive-scrape-page
can be simplified down to
(defun scrape-page (page-num cookie-jar)
(loop for i from page-num
if (get-page i cookie-jar) collect it into pg
else return pg))
As a rule, Common Lisp doesn't guarantee tail-calls the way Scheme does, so it's generally a better idea to use a loop
than raw recursion. SBCL does support some tail calls, but it isn't guaranteed (though this situation looks simple enough that it just might; do some profiling and compare).
You should be able to simplify scrape-xhtml
in a similar way to eliminate (let ((results nil))
.
Note that I haven't tested or profiled any of this since I don't have a "MyFitnessPal" account. Check that it works first.
EDIT the Second:
...
(let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
(let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
...
You use this nested let idiom in a couple of places. I assume this is just because the value of xhtml-tree
depends on the value of valid-html
. In this case, you can instead write
...
(let* ((valid-xhtml (chtml:parse body (cxml:make-string-sink)))
(xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
...
add a comment |
You might want to take a look at defining asdf
systems instead of using quicklisp
to load dependencies internally.
The standard way of doing this is to set up an asd
file. Here's a decent walk-through of that process. It's more verbose than ql:quickload
, but it lets people who don't have quicklisp use your package regardless.
On second thought, screw those guys, keep it up.
(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(let ((logged-in? nil))
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
(if (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
(drakma:cookie-value cookie))
(setq logged-in? t)))
logged-in?))
There's actually a loop
shorthand for "make sure each member of list
satisfies predicate
". The above function can be written as
(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar)
always (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com"))))
foo?
is the Scheme convention for predicates. The common CL conventions are foop
or foo-p
. Personally, I prefer foo?
too, just be aware that it's not standard.
...
(sorted-list (sort list #'first-column-as-date-ascending)))
...
This can get you into trouble. The Common Lisp sort
should really be named sort!
, because it's destructive (so sorted-list
will now contain a sorted list, but list
won't still be the unsorted list, and isn't guaranteed to be the complete sequence anymore). If you might use list
again later, instead do
...
(sorted-list (sort (copy-list list) #'first-column-as-date-ascending)))
...
(if (search "No measurements found." body)
nil
body)
Can be written as
(unless (search "No measurements found." body) body)
EDIT:
format
can accept nested iterations in the directive, so you can eliminate separate-values
by writing make-csv
as
(defun make-csv (list)
"Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
(let ((sorted-list (sort list #'first-column-as-date-ascending)))
(format nil "~{~{~A~^,~}~^~%~}" sorted-list)))
You could eliminate make-csv
entirely by putting the above sort+directive directly into write-csv
(this would also save you a trip through the CSV string, which may or may not make a significant difference).
recursive-scrape-page
can be simplified down to
(defun scrape-page (page-num cookie-jar)
(loop for i from page-num
if (get-page i cookie-jar) collect it into pg
else return pg))
As a rule, Common Lisp doesn't guarantee tail-calls the way Scheme does, so it's generally a better idea to use a loop
than raw recursion. SBCL does support some tail calls, but it isn't guaranteed (though this situation looks simple enough that it just might; do some profiling and compare).
You should be able to simplify scrape-xhtml
in a similar way to eliminate (let ((results nil))
.
Note that I haven't tested or profiled any of this since I don't have a "MyFitnessPal" account. Check that it works first.
EDIT the Second:
...
(let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
(let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
...
You use this nested let idiom in a couple of places. I assume this is just because the value of xhtml-tree
depends on the value of valid-html
. In this case, you can instead write
...
(let* ((valid-xhtml (chtml:parse body (cxml:make-string-sink)))
(xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
...
add a comment |
You might want to take a look at defining asdf
systems instead of using quicklisp
to load dependencies internally.
The standard way of doing this is to set up an asd
file. Here's a decent walk-through of that process. It's more verbose than ql:quickload
, but it lets people who don't have quicklisp use your package regardless.
On second thought, screw those guys, keep it up.
(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(let ((logged-in? nil))
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
(if (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
(drakma:cookie-value cookie))
(setq logged-in? t)))
logged-in?))
There's actually a loop
shorthand for "make sure each member of list
satisfies predicate
". The above function can be written as
(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar)
always (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com"))))
foo?
is the Scheme convention for predicates. The common CL conventions are foop
or foo-p
. Personally, I prefer foo?
too, just be aware that it's not standard.
...
(sorted-list (sort list #'first-column-as-date-ascending)))
...
This can get you into trouble. The Common Lisp sort
should really be named sort!
, because it's destructive (so sorted-list
will now contain a sorted list, but list
won't still be the unsorted list, and isn't guaranteed to be the complete sequence anymore). If you might use list
again later, instead do
...
(sorted-list (sort (copy-list list) #'first-column-as-date-ascending)))
...
(if (search "No measurements found." body)
nil
body)
Can be written as
(unless (search "No measurements found." body) body)
EDIT:
format
can accept nested iterations in the directive, so you can eliminate separate-values
by writing make-csv
as
(defun make-csv (list)
"Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
(let ((sorted-list (sort list #'first-column-as-date-ascending)))
(format nil "~{~{~A~^,~}~^~%~}" sorted-list)))
You could eliminate make-csv
entirely by putting the above sort+directive directly into write-csv
(this would also save you a trip through the CSV string, which may or may not make a significant difference).
recursive-scrape-page
can be simplified down to
(defun scrape-page (page-num cookie-jar)
(loop for i from page-num
if (get-page i cookie-jar) collect it into pg
else return pg))
As a rule, Common Lisp doesn't guarantee tail-calls the way Scheme does, so it's generally a better idea to use a loop
than raw recursion. SBCL does support some tail calls, but it isn't guaranteed (though this situation looks simple enough that it just might; do some profiling and compare).
You should be able to simplify scrape-xhtml
in a similar way to eliminate (let ((results nil))
.
Note that I haven't tested or profiled any of this since I don't have a "MyFitnessPal" account. Check that it works first.
EDIT the Second:
...
(let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
(let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
...
You use this nested let idiom in a couple of places. I assume this is just because the value of xhtml-tree
depends on the value of valid-html
. In this case, you can instead write
...
(let* ((valid-xhtml (chtml:parse body (cxml:make-string-sink)))
(xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
...
You might want to take a look at defining asdf
systems instead of using quicklisp
to load dependencies internally.
The standard way of doing this is to set up an asd
file. Here's a decent walk-through of that process. It's more verbose than ql:quickload
, but it lets people who don't have quicklisp use your package regardless.
On second thought, screw those guys, keep it up.
(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(let ((logged-in? nil))
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
(if (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
(drakma:cookie-value cookie))
(setq logged-in? t)))
logged-in?))
There's actually a loop
shorthand for "make sure each member of list
satisfies predicate
". The above function can be written as
(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar)
always (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com"))))
foo?
is the Scheme convention for predicates. The common CL conventions are foop
or foo-p
. Personally, I prefer foo?
too, just be aware that it's not standard.
...
(sorted-list (sort list #'first-column-as-date-ascending)))
...
This can get you into trouble. The Common Lisp sort
should really be named sort!
, because it's destructive (so sorted-list
will now contain a sorted list, but list
won't still be the unsorted list, and isn't guaranteed to be the complete sequence anymore). If you might use list
again later, instead do
...
(sorted-list (sort (copy-list list) #'first-column-as-date-ascending)))
...
(if (search "No measurements found." body)
nil
body)
Can be written as
(unless (search "No measurements found." body) body)
EDIT:
format
can accept nested iterations in the directive, so you can eliminate separate-values
by writing make-csv
as
(defun make-csv (list)
"Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
(let ((sorted-list (sort list #'first-column-as-date-ascending)))
(format nil "~{~{~A~^,~}~^~%~}" sorted-list)))
You could eliminate make-csv
entirely by putting the above sort+directive directly into write-csv
(this would also save you a trip through the CSV string, which may or may not make a significant difference).
recursive-scrape-page
can be simplified down to
(defun scrape-page (page-num cookie-jar)
(loop for i from page-num
if (get-page i cookie-jar) collect it into pg
else return pg))
As a rule, Common Lisp doesn't guarantee tail-calls the way Scheme does, so it's generally a better idea to use a loop
than raw recursion. SBCL does support some tail calls, but it isn't guaranteed (though this situation looks simple enough that it just might; do some profiling and compare).
You should be able to simplify scrape-xhtml
in a similar way to eliminate (let ((results nil))
.
Note that I haven't tested or profiled any of this since I don't have a "MyFitnessPal" account. Check that it works first.
EDIT the Second:
...
(let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
(let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
...
You use this nested let idiom in a couple of places. I assume this is just because the value of xhtml-tree
depends on the value of valid-html
. In this case, you can instead write
...
(let* ((valid-xhtml (chtml:parse body (cxml:make-string-sink)))
(xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
...
edited May 7 '11 at 5:07
answered May 6 '11 at 19:00
InaimathiInaimathi
1,693821
1,693821
add a comment |
add a comment |
Be careful to indent your code properly to make it more readable. For instance:
(let ((cookie-jar (login username password)))
(if (logged-in? cookie-jar)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
(show-login-failure)))
That piece of code could by the way possibly be improved by an idiomatic WITH-VALID-LOGIN
macro (if you want to practice). It could become...
(with-valid-login (cookie-jar username password)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname))
...with a macro definition like:
(defmacro with-valid-login ((jar user password) &body body)
`(let ((,jar (login ,user ,password)))
(if (logged-in? ,jar)
(progn ,@body)
(show-login-failure))))
add a comment |
Be careful to indent your code properly to make it more readable. For instance:
(let ((cookie-jar (login username password)))
(if (logged-in? cookie-jar)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
(show-login-failure)))
That piece of code could by the way possibly be improved by an idiomatic WITH-VALID-LOGIN
macro (if you want to practice). It could become...
(with-valid-login (cookie-jar username password)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname))
...with a macro definition like:
(defmacro with-valid-login ((jar user password) &body body)
`(let ((,jar (login ,user ,password)))
(if (logged-in? ,jar)
(progn ,@body)
(show-login-failure))))
add a comment |
Be careful to indent your code properly to make it more readable. For instance:
(let ((cookie-jar (login username password)))
(if (logged-in? cookie-jar)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
(show-login-failure)))
That piece of code could by the way possibly be improved by an idiomatic WITH-VALID-LOGIN
macro (if you want to practice). It could become...
(with-valid-login (cookie-jar username password)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname))
...with a macro definition like:
(defmacro with-valid-login ((jar user password) &body body)
`(let ((,jar (login ,user ,password)))
(if (logged-in? ,jar)
(progn ,@body)
(show-login-failure))))
Be careful to indent your code properly to make it more readable. For instance:
(let ((cookie-jar (login username password)))
(if (logged-in? cookie-jar)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
(show-login-failure)))
That piece of code could by the way possibly be improved by an idiomatic WITH-VALID-LOGIN
macro (if you want to practice). It could become...
(with-valid-login (cookie-jar username password)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname))
...with a macro definition like:
(defmacro with-valid-login ((jar user password) &body body)
`(let ((,jar (login ,user ,password)))
(if (logged-in? ,jar)
(progn ,@body)
(show-login-failure))))
edited 2 days ago
devon
1034
1034
answered Jan 12 '16 at 7:41
TorbjørnTorbjørn
362111
362111
add a comment |
add a comment |
Thanks for contributing an answer to Code Review Stack Exchange!
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
Use MathJax to format equations. MathJax reference.
To learn more, see our tips on writing great answers.
Some of your past answers have not been well-received, and you're in danger of being blocked from answering.
Please pay close attention to the following guidance:
- Please be sure to answer the question. Provide details and share your research!
But avoid …
- Asking for help, clarification, or responding to other answers.
- Making statements based on opinion; back them up with references or personal experience.
To learn more, see our tips on writing great answers.
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f2277%2fsimple-web-scraper-in-common-lisp-sbcl%23new-answer', 'question_page');
}
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
I always feel interested in every CL projects . Hey thank to your codebase, now I know how to use defpackage :D
– Dark Cloud
May 6 '11 at 16:19