Simple web-scraper in Common Lisp (SBCL)












6














I have written a simple web-scraper in Common Lisp, & would greatly appreciate any feedback:



(defpackage :myfitnessdata
(:use :common-lisp)
(:export #:main))

(in-package :myfitnessdata)

(require :sb-posix)
(load (merge-pathnames "quicklisp/setup.lisp" (user-homedir-pathname)))
(ql:quickload '("drakma"
"closure-html"
"cxml-stp"
"net-telent-date"))

(defun show-usage ()
(format t "MyFitnessData - a CSV web scraper for the MyFitnessPal website.~%")
;; snip
(format t "'c:\Users\bob\weights.csv', overwriting it if it exists.~%"))

(defun login (username password)
"Logs in to www.myfitnesspal.com. Returns a cookie-jar containing authentication details."
(let ((cookie-jar (make-instance 'drakma:cookie-jar)))
(drakma:http-request "http://www.myfitnesspal.com/account/login"
:method :post
:parameters `(("username" . ,username) ("password" . ,password))
:cookie-jar cookie-jar)
cookie-jar))

(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(let ((logged-in? nil))
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
(if (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
(drakma:cookie-value cookie))
(setq logged-in? t)))
logged-in?))

(defun get-page (page-num cookie-jar)
"Downloads a potentially invalid HTML page containing data to scrape. Returns a string containing the HTML."
(let ((url (concatenate 'string "http://www.myfitnesspal.com/measurements/edit?type=1&page=" (write-to-string page-num))))
(let ((body (drakma:http-request url :cookie-jar cookie-jar)))
(if (search "No measurements found." body)
nil
body))))

(defun scrape-body (body)
"Scrapes data from a potentially invalid HTML document, returning a list of lists of values."
(let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
(let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
(scrape-xhtml xhtml-tree))))

(defun scrape-xhtml (xhtml-tree)
"Scrapes data from an XHTML tree, returning a list of lists of values."
(let ((results nil))
(stp:do-recursively (element xhtml-tree)
(when (and (typep element 'stp:element)
(equal (stp:local-name element) "tr"))
(if (scrape-row element)
(setq results (append results (list (scrape-row element)))))))
results))

(defun scrape-row (row)
"Scrapes data from a table row into a list of values."
(if (equal 4 (stp:number-of-children row))
(let ((measurement-type (nth-child-data 0 row))
(measurement-date (nth-child-data 1 row))
(measurement-value (nth-child-data 2 row)))
(if (not (equal measurement-type "Measurement"))
(list measurement-date measurement-value)))))

(defun nth-child-data (number row)
(stp:data (stp:nth-child 0 (stp:nth-child number row))))

(defun recursive-scrape-page (page-num cookie-jar)
"Recursively scrapes data from a page and all successive pages. Returns a list of lists of values."
(let ((body (get-page page-num cookie-jar)))
(if body
(append (scrape-body body)
(recursive-scrape-page (+ 1 page-num) cookie-jar)))))

(defun show-login-failure ()
(format t "Login failed.~%"))

(defun write-csv (data csv-pathname)
"Takes a list of lists of values, converts them to CSV, and writes them to a file."
(with-open-file (stream csv-pathname
:direction :output
:if-exists :overwrite
:if-does-not-exist :create)
(format stream (make-csv data))))

(defun separate-values (value-list)
"Takes a list of values, and returns a string containing a CSV row that represents the values."
(format nil "~{~A~^,~}" value-list))

(defun make-csv (list)
"Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
(let ((csv "")
(sorted-list (sort list #'first-column-as-date-ascending)))
(mapcar (lambda (row) (setq csv (concatenate 'string csv (separate-values row) (format nil "~%")))) sorted-list)
csv))

(defun first-column-as-date-ascending (first-row second-row)
"Compares two rows by their first column, which is parsed as a time."
(< (net.telent.date:parse-time (car first-row))
(net.telent.date:parse-time (car second-row))))

(defun scrape (username password csv-pathname)
"Attempts to log in, and if successful scrapes all data to the file specified by csv-pathname."
(let ((cookie-jar (login username password)))
(if (logged-in? cookie-jar)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
(show-login-failure))))

(defun main (args)
"The entry point for the application when compiled with buildapp."
(if (= (length args) 4)
(let ((username (nth 1 args))
(password (nth 2 args))
(csv-pathname (nth 3 args)))
(scrape username password csv-pathname))
(show-usage)))


There are several areas I'm unsure about, & would particularly appreciate feedback on:




  • use of let & setq (I've got this wrong in the past)

  • structure, naming & comments (as a Lisper, would you like to inherit this codebase?)


The entire app is here on GitHub if you're interested.










share|improve this question
























  • I always feel interested in every CL projects . Hey thank to your codebase, now I know how to use defpackage :D
    – Dark Cloud
    May 6 '11 at 16:19
















6














I have written a simple web-scraper in Common Lisp, & would greatly appreciate any feedback:



(defpackage :myfitnessdata
(:use :common-lisp)
(:export #:main))

(in-package :myfitnessdata)

(require :sb-posix)
(load (merge-pathnames "quicklisp/setup.lisp" (user-homedir-pathname)))
(ql:quickload '("drakma"
"closure-html"
"cxml-stp"
"net-telent-date"))

(defun show-usage ()
(format t "MyFitnessData - a CSV web scraper for the MyFitnessPal website.~%")
;; snip
(format t "'c:\Users\bob\weights.csv', overwriting it if it exists.~%"))

(defun login (username password)
"Logs in to www.myfitnesspal.com. Returns a cookie-jar containing authentication details."
(let ((cookie-jar (make-instance 'drakma:cookie-jar)))
(drakma:http-request "http://www.myfitnesspal.com/account/login"
:method :post
:parameters `(("username" . ,username) ("password" . ,password))
:cookie-jar cookie-jar)
cookie-jar))

(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(let ((logged-in? nil))
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
(if (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
(drakma:cookie-value cookie))
(setq logged-in? t)))
logged-in?))

(defun get-page (page-num cookie-jar)
"Downloads a potentially invalid HTML page containing data to scrape. Returns a string containing the HTML."
(let ((url (concatenate 'string "http://www.myfitnesspal.com/measurements/edit?type=1&page=" (write-to-string page-num))))
(let ((body (drakma:http-request url :cookie-jar cookie-jar)))
(if (search "No measurements found." body)
nil
body))))

(defun scrape-body (body)
"Scrapes data from a potentially invalid HTML document, returning a list of lists of values."
(let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
(let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
(scrape-xhtml xhtml-tree))))

(defun scrape-xhtml (xhtml-tree)
"Scrapes data from an XHTML tree, returning a list of lists of values."
(let ((results nil))
(stp:do-recursively (element xhtml-tree)
(when (and (typep element 'stp:element)
(equal (stp:local-name element) "tr"))
(if (scrape-row element)
(setq results (append results (list (scrape-row element)))))))
results))

(defun scrape-row (row)
"Scrapes data from a table row into a list of values."
(if (equal 4 (stp:number-of-children row))
(let ((measurement-type (nth-child-data 0 row))
(measurement-date (nth-child-data 1 row))
(measurement-value (nth-child-data 2 row)))
(if (not (equal measurement-type "Measurement"))
(list measurement-date measurement-value)))))

(defun nth-child-data (number row)
(stp:data (stp:nth-child 0 (stp:nth-child number row))))

(defun recursive-scrape-page (page-num cookie-jar)
"Recursively scrapes data from a page and all successive pages. Returns a list of lists of values."
(let ((body (get-page page-num cookie-jar)))
(if body
(append (scrape-body body)
(recursive-scrape-page (+ 1 page-num) cookie-jar)))))

(defun show-login-failure ()
(format t "Login failed.~%"))

(defun write-csv (data csv-pathname)
"Takes a list of lists of values, converts them to CSV, and writes them to a file."
(with-open-file (stream csv-pathname
:direction :output
:if-exists :overwrite
:if-does-not-exist :create)
(format stream (make-csv data))))

(defun separate-values (value-list)
"Takes a list of values, and returns a string containing a CSV row that represents the values."
(format nil "~{~A~^,~}" value-list))

(defun make-csv (list)
"Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
(let ((csv "")
(sorted-list (sort list #'first-column-as-date-ascending)))
(mapcar (lambda (row) (setq csv (concatenate 'string csv (separate-values row) (format nil "~%")))) sorted-list)
csv))

(defun first-column-as-date-ascending (first-row second-row)
"Compares two rows by their first column, which is parsed as a time."
(< (net.telent.date:parse-time (car first-row))
(net.telent.date:parse-time (car second-row))))

(defun scrape (username password csv-pathname)
"Attempts to log in, and if successful scrapes all data to the file specified by csv-pathname."
(let ((cookie-jar (login username password)))
(if (logged-in? cookie-jar)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
(show-login-failure))))

(defun main (args)
"The entry point for the application when compiled with buildapp."
(if (= (length args) 4)
(let ((username (nth 1 args))
(password (nth 2 args))
(csv-pathname (nth 3 args)))
(scrape username password csv-pathname))
(show-usage)))


There are several areas I'm unsure about, & would particularly appreciate feedback on:




  • use of let & setq (I've got this wrong in the past)

  • structure, naming & comments (as a Lisper, would you like to inherit this codebase?)


The entire app is here on GitHub if you're interested.










share|improve this question
























  • I always feel interested in every CL projects . Hey thank to your codebase, now I know how to use defpackage :D
    – Dark Cloud
    May 6 '11 at 16:19














6












6








6


4





I have written a simple web-scraper in Common Lisp, & would greatly appreciate any feedback:



(defpackage :myfitnessdata
(:use :common-lisp)
(:export #:main))

(in-package :myfitnessdata)

(require :sb-posix)
(load (merge-pathnames "quicklisp/setup.lisp" (user-homedir-pathname)))
(ql:quickload '("drakma"
"closure-html"
"cxml-stp"
"net-telent-date"))

(defun show-usage ()
(format t "MyFitnessData - a CSV web scraper for the MyFitnessPal website.~%")
;; snip
(format t "'c:\Users\bob\weights.csv', overwriting it if it exists.~%"))

(defun login (username password)
"Logs in to www.myfitnesspal.com. Returns a cookie-jar containing authentication details."
(let ((cookie-jar (make-instance 'drakma:cookie-jar)))
(drakma:http-request "http://www.myfitnesspal.com/account/login"
:method :post
:parameters `(("username" . ,username) ("password" . ,password))
:cookie-jar cookie-jar)
cookie-jar))

(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(let ((logged-in? nil))
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
(if (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
(drakma:cookie-value cookie))
(setq logged-in? t)))
logged-in?))

(defun get-page (page-num cookie-jar)
"Downloads a potentially invalid HTML page containing data to scrape. Returns a string containing the HTML."
(let ((url (concatenate 'string "http://www.myfitnesspal.com/measurements/edit?type=1&page=" (write-to-string page-num))))
(let ((body (drakma:http-request url :cookie-jar cookie-jar)))
(if (search "No measurements found." body)
nil
body))))

(defun scrape-body (body)
"Scrapes data from a potentially invalid HTML document, returning a list of lists of values."
(let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
(let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
(scrape-xhtml xhtml-tree))))

(defun scrape-xhtml (xhtml-tree)
"Scrapes data from an XHTML tree, returning a list of lists of values."
(let ((results nil))
(stp:do-recursively (element xhtml-tree)
(when (and (typep element 'stp:element)
(equal (stp:local-name element) "tr"))
(if (scrape-row element)
(setq results (append results (list (scrape-row element)))))))
results))

(defun scrape-row (row)
"Scrapes data from a table row into a list of values."
(if (equal 4 (stp:number-of-children row))
(let ((measurement-type (nth-child-data 0 row))
(measurement-date (nth-child-data 1 row))
(measurement-value (nth-child-data 2 row)))
(if (not (equal measurement-type "Measurement"))
(list measurement-date measurement-value)))))

(defun nth-child-data (number row)
(stp:data (stp:nth-child 0 (stp:nth-child number row))))

(defun recursive-scrape-page (page-num cookie-jar)
"Recursively scrapes data from a page and all successive pages. Returns a list of lists of values."
(let ((body (get-page page-num cookie-jar)))
(if body
(append (scrape-body body)
(recursive-scrape-page (+ 1 page-num) cookie-jar)))))

(defun show-login-failure ()
(format t "Login failed.~%"))

(defun write-csv (data csv-pathname)
"Takes a list of lists of values, converts them to CSV, and writes them to a file."
(with-open-file (stream csv-pathname
:direction :output
:if-exists :overwrite
:if-does-not-exist :create)
(format stream (make-csv data))))

(defun separate-values (value-list)
"Takes a list of values, and returns a string containing a CSV row that represents the values."
(format nil "~{~A~^,~}" value-list))

(defun make-csv (list)
"Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
(let ((csv "")
(sorted-list (sort list #'first-column-as-date-ascending)))
(mapcar (lambda (row) (setq csv (concatenate 'string csv (separate-values row) (format nil "~%")))) sorted-list)
csv))

(defun first-column-as-date-ascending (first-row second-row)
"Compares two rows by their first column, which is parsed as a time."
(< (net.telent.date:parse-time (car first-row))
(net.telent.date:parse-time (car second-row))))

(defun scrape (username password csv-pathname)
"Attempts to log in, and if successful scrapes all data to the file specified by csv-pathname."
(let ((cookie-jar (login username password)))
(if (logged-in? cookie-jar)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
(show-login-failure))))

(defun main (args)
"The entry point for the application when compiled with buildapp."
(if (= (length args) 4)
(let ((username (nth 1 args))
(password (nth 2 args))
(csv-pathname (nth 3 args)))
(scrape username password csv-pathname))
(show-usage)))


There are several areas I'm unsure about, & would particularly appreciate feedback on:




  • use of let & setq (I've got this wrong in the past)

  • structure, naming & comments (as a Lisper, would you like to inherit this codebase?)


The entire app is here on GitHub if you're interested.










share|improve this question















I have written a simple web-scraper in Common Lisp, & would greatly appreciate any feedback:



(defpackage :myfitnessdata
(:use :common-lisp)
(:export #:main))

(in-package :myfitnessdata)

(require :sb-posix)
(load (merge-pathnames "quicklisp/setup.lisp" (user-homedir-pathname)))
(ql:quickload '("drakma"
"closure-html"
"cxml-stp"
"net-telent-date"))

(defun show-usage ()
(format t "MyFitnessData - a CSV web scraper for the MyFitnessPal website.~%")
;; snip
(format t "'c:\Users\bob\weights.csv', overwriting it if it exists.~%"))

(defun login (username password)
"Logs in to www.myfitnesspal.com. Returns a cookie-jar containing authentication details."
(let ((cookie-jar (make-instance 'drakma:cookie-jar)))
(drakma:http-request "http://www.myfitnesspal.com/account/login"
:method :post
:parameters `(("username" . ,username) ("password" . ,password))
:cookie-jar cookie-jar)
cookie-jar))

(defun logged-in? (cookie-jar)
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(let ((logged-in? nil))
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
(if (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
(drakma:cookie-value cookie))
(setq logged-in? t)))
logged-in?))

(defun get-page (page-num cookie-jar)
"Downloads a potentially invalid HTML page containing data to scrape. Returns a string containing the HTML."
(let ((url (concatenate 'string "http://www.myfitnesspal.com/measurements/edit?type=1&page=" (write-to-string page-num))))
(let ((body (drakma:http-request url :cookie-jar cookie-jar)))
(if (search "No measurements found." body)
nil
body))))

(defun scrape-body (body)
"Scrapes data from a potentially invalid HTML document, returning a list of lists of values."
(let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
(let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
(scrape-xhtml xhtml-tree))))

(defun scrape-xhtml (xhtml-tree)
"Scrapes data from an XHTML tree, returning a list of lists of values."
(let ((results nil))
(stp:do-recursively (element xhtml-tree)
(when (and (typep element 'stp:element)
(equal (stp:local-name element) "tr"))
(if (scrape-row element)
(setq results (append results (list (scrape-row element)))))))
results))

(defun scrape-row (row)
"Scrapes data from a table row into a list of values."
(if (equal 4 (stp:number-of-children row))
(let ((measurement-type (nth-child-data 0 row))
(measurement-date (nth-child-data 1 row))
(measurement-value (nth-child-data 2 row)))
(if (not (equal measurement-type "Measurement"))
(list measurement-date measurement-value)))))

(defun nth-child-data (number row)
(stp:data (stp:nth-child 0 (stp:nth-child number row))))

(defun recursive-scrape-page (page-num cookie-jar)
"Recursively scrapes data from a page and all successive pages. Returns a list of lists of values."
(let ((body (get-page page-num cookie-jar)))
(if body
(append (scrape-body body)
(recursive-scrape-page (+ 1 page-num) cookie-jar)))))

(defun show-login-failure ()
(format t "Login failed.~%"))

(defun write-csv (data csv-pathname)
"Takes a list of lists of values, converts them to CSV, and writes them to a file."
(with-open-file (stream csv-pathname
:direction :output
:if-exists :overwrite
:if-does-not-exist :create)
(format stream (make-csv data))))

(defun separate-values (value-list)
"Takes a list of values, and returns a string containing a CSV row that represents the values."
(format nil "~{~A~^,~}" value-list))

(defun make-csv (list)
"Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
(let ((csv "")
(sorted-list (sort list #'first-column-as-date-ascending)))
(mapcar (lambda (row) (setq csv (concatenate 'string csv (separate-values row) (format nil "~%")))) sorted-list)
csv))

(defun first-column-as-date-ascending (first-row second-row)
"Compares two rows by their first column, which is parsed as a time."
(< (net.telent.date:parse-time (car first-row))
(net.telent.date:parse-time (car second-row))))

(defun scrape (username password csv-pathname)
"Attempts to log in, and if successful scrapes all data to the file specified by csv-pathname."
(let ((cookie-jar (login username password)))
(if (logged-in? cookie-jar)
(write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
(show-login-failure))))

(defun main (args)
"The entry point for the application when compiled with buildapp."
(if (= (length args) 4)
(let ((username (nth 1 args))
(password (nth 2 args))
(csv-pathname (nth 3 args)))
(scrape username password csv-pathname))
(show-usage)))


There are several areas I'm unsure about, & would particularly appreciate feedback on:




  • use of let & setq (I've got this wrong in the past)

  • structure, naming & comments (as a Lisper, would you like to inherit this codebase?)


The entire app is here on GitHub if you're interested.







common-lisp






share|improve this question















share|improve this question













share|improve this question




share|improve this question








edited Apr 13 '17 at 12:40









Community

1




1










asked May 6 '11 at 9:33









Duncan BayneDuncan Bayne

280111




280111












  • I always feel interested in every CL projects . Hey thank to your codebase, now I know how to use defpackage :D
    – Dark Cloud
    May 6 '11 at 16:19


















  • I always feel interested in every CL projects . Hey thank to your codebase, now I know how to use defpackage :D
    – Dark Cloud
    May 6 '11 at 16:19
















I always feel interested in every CL projects . Hey thank to your codebase, now I know how to use defpackage :D
– Dark Cloud
May 6 '11 at 16:19




I always feel interested in every CL projects . Hey thank to your codebase, now I know how to use defpackage :D
– Dark Cloud
May 6 '11 at 16:19










2 Answers
2






active

oldest

votes


















6














You might want to take a look at defining asdf systems instead of using quicklisp to load dependencies internally.



The standard way of doing this is to set up an asd file. Here's a decent walk-through of that process. It's more verbose than ql:quickload, but it lets people who don't have quicklisp use your package regardless.



On second thought, screw those guys, keep it up.





(defun logged-in? (cookie-jar)       
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(let ((logged-in? nil))
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
(if (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
(drakma:cookie-value cookie))
(setq logged-in? t)))
logged-in?))


There's actually a loop shorthand for "make sure each member of list satisfies predicate". The above function can be written as



(defun logged-in? (cookie-jar)       
"Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
(loop for cookie in (drakma:cookie-jar-cookies cookie-jar)
always (and (equal (drakma:cookie-name cookie) "known_user")
(equal (drakma:cookie-domain cookie) "www.myfitnesspal.com"))))




foo? is the Scheme convention for predicates. The common CL conventions are foop or foo-p. Personally, I prefer foo? too, just be aware that it's not standard.





...
(sorted-list (sort list #'first-column-as-date-ascending)))
...


This can get you into trouble. The Common Lisp sort should really be named sort!, because it's destructive (so sorted-list will now contain a sorted list, but list won't still be the unsorted list, and isn't guaranteed to be the complete sequence anymore). If you might use list again later, instead do



...
(sorted-list (sort (copy-list list) #'first-column-as-date-ascending)))
...




(if (search "No measurements found." body)
nil
body)


Can be written as



(unless (search "No measurements found." body) body)




EDIT:



format can accept nested iterations in the directive, so you can eliminate separate-values by writing make-csv as



(defun make-csv (list)
"Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
(let ((sorted-list (sort list #'first-column-as-date-ascending)))
(format nil "~{~{~A~^,~}~^~%~}" sorted-list)))


You could eliminate make-csv entirely by putting the above sort+directive directly into write-csv (this would also save you a trip through the CSV string, which may or may not make a significant difference).





recursive-scrape-page can be simplified down to



(defun scrape-page (page-num cookie-jar)
(loop for i from page-num
if (get-page i cookie-jar) collect it into pg
else return pg))


As a rule, Common Lisp doesn't guarantee tail-calls the way Scheme does, so it's generally a better idea to use a loop than raw recursion. SBCL does support some tail calls, but it isn't guaranteed (though this situation looks simple enough that it just might; do some profiling and compare).



You should be able to simplify scrape-xhtml in a similar way to eliminate (let ((results nil)).



Note that I haven't tested or profiled any of this since I don't have a "MyFitnessPal" account. Check that it works first.





EDIT the Second:



 ...
(let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
(let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
...


You use this nested let idiom in a couple of places. I assume this is just because the value of xhtml-tree depends on the value of valid-html. In this case, you can instead write



 ...
(let* ((valid-xhtml (chtml:parse body (cxml:make-string-sink)))
(xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
...





share|improve this answer































    2














    Be careful to indent your code properly to make it more readable. For instance:



    (let ((cookie-jar (login username password)))
    (if (logged-in? cookie-jar)
    (write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
    (show-login-failure)))


    That piece of code could by the way possibly be improved by an idiomatic WITH-VALID-LOGIN macro (if you want to practice). It could become...



    (with-valid-login (cookie-jar username password)
    (write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname))


    ...with a macro definition like:



    (defmacro with-valid-login ((jar user password) &body body)
    `(let ((,jar (login ,user ,password)))
    (if (logged-in? ,jar)
    (progn ,@body)
    (show-login-failure))))





    share|improve this answer























      Your Answer





      StackExchange.ifUsing("editor", function () {
      return StackExchange.using("mathjaxEditing", function () {
      StackExchange.MarkdownEditor.creationCallbacks.add(function (editor, postfix) {
      StackExchange.mathjaxEditing.prepareWmdForMathJax(editor, postfix, [["\$", "\$"]]);
      });
      });
      }, "mathjax-editing");

      StackExchange.ifUsing("editor", function () {
      StackExchange.using("externalEditor", function () {
      StackExchange.using("snippets", function () {
      StackExchange.snippets.init();
      });
      });
      }, "code-snippets");

      StackExchange.ready(function() {
      var channelOptions = {
      tags: "".split(" "),
      id: "196"
      };
      initTagRenderer("".split(" "), "".split(" "), channelOptions);

      StackExchange.using("externalEditor", function() {
      // Have to fire editor after snippets, if snippets enabled
      if (StackExchange.settings.snippets.snippetsEnabled) {
      StackExchange.using("snippets", function() {
      createEditor();
      });
      }
      else {
      createEditor();
      }
      });

      function createEditor() {
      StackExchange.prepareEditor({
      heartbeatType: 'answer',
      autoActivateHeartbeat: false,
      convertImagesToLinks: false,
      noModals: true,
      showLowRepImageUploadWarning: true,
      reputationToPostImages: null,
      bindNavPrevention: true,
      postfix: "",
      imageUploader: {
      brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
      contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
      allowUrls: true
      },
      onDemand: true,
      discardSelector: ".discard-answer"
      ,immediatelyShowMarkdownHelp:true
      });


      }
      });














      draft saved

      draft discarded


















      StackExchange.ready(
      function () {
      StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f2277%2fsimple-web-scraper-in-common-lisp-sbcl%23new-answer', 'question_page');
      }
      );

      Post as a guest















      Required, but never shown

























      2 Answers
      2






      active

      oldest

      votes








      2 Answers
      2






      active

      oldest

      votes









      active

      oldest

      votes






      active

      oldest

      votes









      6














      You might want to take a look at defining asdf systems instead of using quicklisp to load dependencies internally.



      The standard way of doing this is to set up an asd file. Here's a decent walk-through of that process. It's more verbose than ql:quickload, but it lets people who don't have quicklisp use your package regardless.



      On second thought, screw those guys, keep it up.





      (defun logged-in? (cookie-jar)       
      "Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
      (let ((logged-in? nil))
      (loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
      (if (and (equal (drakma:cookie-name cookie) "known_user")
      (equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
      (drakma:cookie-value cookie))
      (setq logged-in? t)))
      logged-in?))


      There's actually a loop shorthand for "make sure each member of list satisfies predicate". The above function can be written as



      (defun logged-in? (cookie-jar)       
      "Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
      (loop for cookie in (drakma:cookie-jar-cookies cookie-jar)
      always (and (equal (drakma:cookie-name cookie) "known_user")
      (equal (drakma:cookie-domain cookie) "www.myfitnesspal.com"))))




      foo? is the Scheme convention for predicates. The common CL conventions are foop or foo-p. Personally, I prefer foo? too, just be aware that it's not standard.





      ...
      (sorted-list (sort list #'first-column-as-date-ascending)))
      ...


      This can get you into trouble. The Common Lisp sort should really be named sort!, because it's destructive (so sorted-list will now contain a sorted list, but list won't still be the unsorted list, and isn't guaranteed to be the complete sequence anymore). If you might use list again later, instead do



      ...
      (sorted-list (sort (copy-list list) #'first-column-as-date-ascending)))
      ...




      (if (search "No measurements found." body)
      nil
      body)


      Can be written as



      (unless (search "No measurements found." body) body)




      EDIT:



      format can accept nested iterations in the directive, so you can eliminate separate-values by writing make-csv as



      (defun make-csv (list)
      "Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
      (let ((sorted-list (sort list #'first-column-as-date-ascending)))
      (format nil "~{~{~A~^,~}~^~%~}" sorted-list)))


      You could eliminate make-csv entirely by putting the above sort+directive directly into write-csv (this would also save you a trip through the CSV string, which may or may not make a significant difference).





      recursive-scrape-page can be simplified down to



      (defun scrape-page (page-num cookie-jar)
      (loop for i from page-num
      if (get-page i cookie-jar) collect it into pg
      else return pg))


      As a rule, Common Lisp doesn't guarantee tail-calls the way Scheme does, so it's generally a better idea to use a loop than raw recursion. SBCL does support some tail calls, but it isn't guaranteed (though this situation looks simple enough that it just might; do some profiling and compare).



      You should be able to simplify scrape-xhtml in a similar way to eliminate (let ((results nil)).



      Note that I haven't tested or profiled any of this since I don't have a "MyFitnessPal" account. Check that it works first.





      EDIT the Second:



       ...
      (let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
      (let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
      ...


      You use this nested let idiom in a couple of places. I assume this is just because the value of xhtml-tree depends on the value of valid-html. In this case, you can instead write



       ...
      (let* ((valid-xhtml (chtml:parse body (cxml:make-string-sink)))
      (xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
      ...





      share|improve this answer




























        6














        You might want to take a look at defining asdf systems instead of using quicklisp to load dependencies internally.



        The standard way of doing this is to set up an asd file. Here's a decent walk-through of that process. It's more verbose than ql:quickload, but it lets people who don't have quicklisp use your package regardless.



        On second thought, screw those guys, keep it up.





        (defun logged-in? (cookie-jar)       
        "Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
        (let ((logged-in? nil))
        (loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
        (if (and (equal (drakma:cookie-name cookie) "known_user")
        (equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
        (drakma:cookie-value cookie))
        (setq logged-in? t)))
        logged-in?))


        There's actually a loop shorthand for "make sure each member of list satisfies predicate". The above function can be written as



        (defun logged-in? (cookie-jar)       
        "Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
        (loop for cookie in (drakma:cookie-jar-cookies cookie-jar)
        always (and (equal (drakma:cookie-name cookie) "known_user")
        (equal (drakma:cookie-domain cookie) "www.myfitnesspal.com"))))




        foo? is the Scheme convention for predicates. The common CL conventions are foop or foo-p. Personally, I prefer foo? too, just be aware that it's not standard.





        ...
        (sorted-list (sort list #'first-column-as-date-ascending)))
        ...


        This can get you into trouble. The Common Lisp sort should really be named sort!, because it's destructive (so sorted-list will now contain a sorted list, but list won't still be the unsorted list, and isn't guaranteed to be the complete sequence anymore). If you might use list again later, instead do



        ...
        (sorted-list (sort (copy-list list) #'first-column-as-date-ascending)))
        ...




        (if (search "No measurements found." body)
        nil
        body)


        Can be written as



        (unless (search "No measurements found." body) body)




        EDIT:



        format can accept nested iterations in the directive, so you can eliminate separate-values by writing make-csv as



        (defun make-csv (list)
        "Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
        (let ((sorted-list (sort list #'first-column-as-date-ascending)))
        (format nil "~{~{~A~^,~}~^~%~}" sorted-list)))


        You could eliminate make-csv entirely by putting the above sort+directive directly into write-csv (this would also save you a trip through the CSV string, which may or may not make a significant difference).





        recursive-scrape-page can be simplified down to



        (defun scrape-page (page-num cookie-jar)
        (loop for i from page-num
        if (get-page i cookie-jar) collect it into pg
        else return pg))


        As a rule, Common Lisp doesn't guarantee tail-calls the way Scheme does, so it's generally a better idea to use a loop than raw recursion. SBCL does support some tail calls, but it isn't guaranteed (though this situation looks simple enough that it just might; do some profiling and compare).



        You should be able to simplify scrape-xhtml in a similar way to eliminate (let ((results nil)).



        Note that I haven't tested or profiled any of this since I don't have a "MyFitnessPal" account. Check that it works first.





        EDIT the Second:



         ...
        (let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
        (let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
        ...


        You use this nested let idiom in a couple of places. I assume this is just because the value of xhtml-tree depends on the value of valid-html. In this case, you can instead write



         ...
        (let* ((valid-xhtml (chtml:parse body (cxml:make-string-sink)))
        (xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
        ...





        share|improve this answer


























          6












          6








          6






          You might want to take a look at defining asdf systems instead of using quicklisp to load dependencies internally.



          The standard way of doing this is to set up an asd file. Here's a decent walk-through of that process. It's more verbose than ql:quickload, but it lets people who don't have quicklisp use your package regardless.



          On second thought, screw those guys, keep it up.





          (defun logged-in? (cookie-jar)       
          "Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
          (let ((logged-in? nil))
          (loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
          (if (and (equal (drakma:cookie-name cookie) "known_user")
          (equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
          (drakma:cookie-value cookie))
          (setq logged-in? t)))
          logged-in?))


          There's actually a loop shorthand for "make sure each member of list satisfies predicate". The above function can be written as



          (defun logged-in? (cookie-jar)       
          "Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
          (loop for cookie in (drakma:cookie-jar-cookies cookie-jar)
          always (and (equal (drakma:cookie-name cookie) "known_user")
          (equal (drakma:cookie-domain cookie) "www.myfitnesspal.com"))))




          foo? is the Scheme convention for predicates. The common CL conventions are foop or foo-p. Personally, I prefer foo? too, just be aware that it's not standard.





          ...
          (sorted-list (sort list #'first-column-as-date-ascending)))
          ...


          This can get you into trouble. The Common Lisp sort should really be named sort!, because it's destructive (so sorted-list will now contain a sorted list, but list won't still be the unsorted list, and isn't guaranteed to be the complete sequence anymore). If you might use list again later, instead do



          ...
          (sorted-list (sort (copy-list list) #'first-column-as-date-ascending)))
          ...




          (if (search "No measurements found." body)
          nil
          body)


          Can be written as



          (unless (search "No measurements found." body) body)




          EDIT:



          format can accept nested iterations in the directive, so you can eliminate separate-values by writing make-csv as



          (defun make-csv (list)
          "Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
          (let ((sorted-list (sort list #'first-column-as-date-ascending)))
          (format nil "~{~{~A~^,~}~^~%~}" sorted-list)))


          You could eliminate make-csv entirely by putting the above sort+directive directly into write-csv (this would also save you a trip through the CSV string, which may or may not make a significant difference).





          recursive-scrape-page can be simplified down to



          (defun scrape-page (page-num cookie-jar)
          (loop for i from page-num
          if (get-page i cookie-jar) collect it into pg
          else return pg))


          As a rule, Common Lisp doesn't guarantee tail-calls the way Scheme does, so it's generally a better idea to use a loop than raw recursion. SBCL does support some tail calls, but it isn't guaranteed (though this situation looks simple enough that it just might; do some profiling and compare).



          You should be able to simplify scrape-xhtml in a similar way to eliminate (let ((results nil)).



          Note that I haven't tested or profiled any of this since I don't have a "MyFitnessPal" account. Check that it works first.





          EDIT the Second:



           ...
          (let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
          (let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
          ...


          You use this nested let idiom in a couple of places. I assume this is just because the value of xhtml-tree depends on the value of valid-html. In this case, you can instead write



           ...
          (let* ((valid-xhtml (chtml:parse body (cxml:make-string-sink)))
          (xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
          ...





          share|improve this answer














          You might want to take a look at defining asdf systems instead of using quicklisp to load dependencies internally.



          The standard way of doing this is to set up an asd file. Here's a decent walk-through of that process. It's more verbose than ql:quickload, but it lets people who don't have quicklisp use your package regardless.



          On second thought, screw those guys, keep it up.





          (defun logged-in? (cookie-jar)       
          "Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
          (let ((logged-in? nil))
          (loop for cookie in (drakma:cookie-jar-cookies cookie-jar) do
          (if (and (equal (drakma:cookie-name cookie) "known_user")
          (equal (drakma:cookie-domain cookie) "www.myfitnesspal.com")
          (drakma:cookie-value cookie))
          (setq logged-in? t)))
          logged-in?))


          There's actually a loop shorthand for "make sure each member of list satisfies predicate". The above function can be written as



          (defun logged-in? (cookie-jar)       
          "Returns true if a cookie-jar contains login information for www.myfitnesspal.com, and nil otherwise."
          (loop for cookie in (drakma:cookie-jar-cookies cookie-jar)
          always (and (equal (drakma:cookie-name cookie) "known_user")
          (equal (drakma:cookie-domain cookie) "www.myfitnesspal.com"))))




          foo? is the Scheme convention for predicates. The common CL conventions are foop or foo-p. Personally, I prefer foo? too, just be aware that it's not standard.





          ...
          (sorted-list (sort list #'first-column-as-date-ascending)))
          ...


          This can get you into trouble. The Common Lisp sort should really be named sort!, because it's destructive (so sorted-list will now contain a sorted list, but list won't still be the unsorted list, and isn't guaranteed to be the complete sequence anymore). If you might use list again later, instead do



          ...
          (sorted-list (sort (copy-list list) #'first-column-as-date-ascending)))
          ...




          (if (search "No measurements found." body)
          nil
          body)


          Can be written as



          (unless (search "No measurements found." body) body)




          EDIT:



          format can accept nested iterations in the directive, so you can eliminate separate-values by writing make-csv as



          (defun make-csv (list)
          "Takes a list of lists of values, and returns a string containing a CSV file representing each top-level list as a row."
          (let ((sorted-list (sort list #'first-column-as-date-ascending)))
          (format nil "~{~{~A~^,~}~^~%~}" sorted-list)))


          You could eliminate make-csv entirely by putting the above sort+directive directly into write-csv (this would also save you a trip through the CSV string, which may or may not make a significant difference).





          recursive-scrape-page can be simplified down to



          (defun scrape-page (page-num cookie-jar)
          (loop for i from page-num
          if (get-page i cookie-jar) collect it into pg
          else return pg))


          As a rule, Common Lisp doesn't guarantee tail-calls the way Scheme does, so it's generally a better idea to use a loop than raw recursion. SBCL does support some tail calls, but it isn't guaranteed (though this situation looks simple enough that it just might; do some profiling and compare).



          You should be able to simplify scrape-xhtml in a similar way to eliminate (let ((results nil)).



          Note that I haven't tested or profiled any of this since I don't have a "MyFitnessPal" account. Check that it works first.





          EDIT the Second:



           ...
          (let ((valid-xhtml (chtml:parse body (cxml:make-string-sink))))
          (let ((xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
          ...


          You use this nested let idiom in a couple of places. I assume this is just because the value of xhtml-tree depends on the value of valid-html. In this case, you can instead write



           ...
          (let* ((valid-xhtml (chtml:parse body (cxml:make-string-sink)))
          (xhtml-tree (chtml:parse valid-xhtml (cxml-stp:make-builder))))
          ...






          share|improve this answer














          share|improve this answer



          share|improve this answer








          edited May 7 '11 at 5:07

























          answered May 6 '11 at 19:00









          InaimathiInaimathi

          1,693821




          1,693821

























              2














              Be careful to indent your code properly to make it more readable. For instance:



              (let ((cookie-jar (login username password)))
              (if (logged-in? cookie-jar)
              (write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
              (show-login-failure)))


              That piece of code could by the way possibly be improved by an idiomatic WITH-VALID-LOGIN macro (if you want to practice). It could become...



              (with-valid-login (cookie-jar username password)
              (write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname))


              ...with a macro definition like:



              (defmacro with-valid-login ((jar user password) &body body)
              `(let ((,jar (login ,user ,password)))
              (if (logged-in? ,jar)
              (progn ,@body)
              (show-login-failure))))





              share|improve this answer




























                2














                Be careful to indent your code properly to make it more readable. For instance:



                (let ((cookie-jar (login username password)))
                (if (logged-in? cookie-jar)
                (write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
                (show-login-failure)))


                That piece of code could by the way possibly be improved by an idiomatic WITH-VALID-LOGIN macro (if you want to practice). It could become...



                (with-valid-login (cookie-jar username password)
                (write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname))


                ...with a macro definition like:



                (defmacro with-valid-login ((jar user password) &body body)
                `(let ((,jar (login ,user ,password)))
                (if (logged-in? ,jar)
                (progn ,@body)
                (show-login-failure))))





                share|improve this answer


























                  2












                  2








                  2






                  Be careful to indent your code properly to make it more readable. For instance:



                  (let ((cookie-jar (login username password)))
                  (if (logged-in? cookie-jar)
                  (write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
                  (show-login-failure)))


                  That piece of code could by the way possibly be improved by an idiomatic WITH-VALID-LOGIN macro (if you want to practice). It could become...



                  (with-valid-login (cookie-jar username password)
                  (write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname))


                  ...with a macro definition like:



                  (defmacro with-valid-login ((jar user password) &body body)
                  `(let ((,jar (login ,user ,password)))
                  (if (logged-in? ,jar)
                  (progn ,@body)
                  (show-login-failure))))





                  share|improve this answer














                  Be careful to indent your code properly to make it more readable. For instance:



                  (let ((cookie-jar (login username password)))
                  (if (logged-in? cookie-jar)
                  (write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname)
                  (show-login-failure)))


                  That piece of code could by the way possibly be improved by an idiomatic WITH-VALID-LOGIN macro (if you want to practice). It could become...



                  (with-valid-login (cookie-jar username password)
                  (write-csv (recursive-scrape-page 1 cookie-jar) csv-pathname))


                  ...with a macro definition like:



                  (defmacro with-valid-login ((jar user password) &body body)
                  `(let ((,jar (login ,user ,password)))
                  (if (logged-in? ,jar)
                  (progn ,@body)
                  (show-login-failure))))






                  share|improve this answer














                  share|improve this answer



                  share|improve this answer








                  edited 2 days ago









                  devon

                  1034




                  1034










                  answered Jan 12 '16 at 7:41









                  TorbjørnTorbjørn

                  362111




                  362111






























                      draft saved

                      draft discarded




















































                      Thanks for contributing an answer to Code Review Stack Exchange!


                      • Please be sure to answer the question. Provide details and share your research!

                      But avoid



                      • Asking for help, clarification, or responding to other answers.

                      • Making statements based on opinion; back them up with references or personal experience.


                      Use MathJax to format equations. MathJax reference.


                      To learn more, see our tips on writing great answers.





                      Some of your past answers have not been well-received, and you're in danger of being blocked from answering.


                      Please pay close attention to the following guidance:


                      • Please be sure to answer the question. Provide details and share your research!

                      But avoid



                      • Asking for help, clarification, or responding to other answers.

                      • Making statements based on opinion; back them up with references or personal experience.


                      To learn more, see our tips on writing great answers.




                      draft saved


                      draft discarded














                      StackExchange.ready(
                      function () {
                      StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f2277%2fsimple-web-scraper-in-common-lisp-sbcl%23new-answer', 'question_page');
                      }
                      );

                      Post as a guest















                      Required, but never shown





















































                      Required, but never shown














                      Required, but never shown












                      Required, but never shown







                      Required, but never shown

































                      Required, but never shown














                      Required, but never shown












                      Required, but never shown







                      Required, but never shown







                      Popular posts from this blog

                      Список кардиналов, возведённых папой римским Каликстом III

                      Deduzione

                      Mysql.sock missing - “Can't connect to local MySQL server through socket”