C0 code coverage information
Generated on Tue Oct 16 11:40:48 -0400 2007 with rcov 0.8.0
Code reported as executed by Ruby looks like this...
and this: this line is also marked as covered.
Lines considered as run by rcov, but not reported by Ruby, look like this,
and this: these lines were inferred by rcov (using simple heuristics).
Finally, here's a line marked as not executed.
1 # Copyright (C) 2007 Marco Costantini
2 # based on ibs_it.rb by Claudio Belotti
3 #
4 # Alexandria is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU General Public License as
6 # published by the Free Software Foundation; either version 2 of the
7 # License, or (at your option) any later version.
8 #
9 # Alexandria is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # General Public License for more details.
13 #
14 # You should have received a copy of the GNU General Public
15 # License along with Alexandria; see the file COPYING. If not,
16 # write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 # Boston, MA 02111-1307, USA.
18
19 # http://en.wikipedia.org/wiki/WorldCat
20 # See http://www.oclc.org/worldcat/policies/terms/
21
22 require 'fileutils'
23 require 'net/http'
24 require 'open-uri'
25 #require 'cgi'
26
27 module Alexandria
28 class BookProviders
29 class WorldcatProvider < GenericProvider
30 BASE_URI = "http://worldcat.org"
31 CACHE_DIR = File.join(Alexandria::Library::DIR, '.worldcat_cache')
32 REFERER = BASE_URI
33 def initialize
34 super("Worldcat", "Worldcat")
35 FileUtils.mkdir_p(CACHE_DIR) unless File.exists?(CACHE_DIR)
36 # no preferences for the moment
37 at_exit { clean_cache }
38 end
39
40 def search(criterion, type)
41 req = BASE_URI + "/"
42 req += case type
43 when SEARCH_BY_ISBN
44 "isbn/"
45
46 when SEARCH_BY_TITLE
47 "search?q=ti%3A"
48
49 when SEARCH_BY_AUTHORS
50 "search?q=au%3A"
51
52 when SEARCH_BY_KEYWORD
53 "search?q="
54
55 else
56 raise InvalidSearchTypeError
57
58 end
59
60 req += CGI.escape(criterion)
61 p req if $DEBUG
62 data = transport.get(URI.parse(req))
63 if type == SEARCH_BY_ISBN
64 to_book(data) #rescue raise NoResultsError
65 else
66 begin
67 results = []
68 each_book_page(data) do |code, title|
69 results << to_book(transport.get(URI.parse(BASE_URI + "/oclc/" + code)))
70 end
71 return results
72 rescue
73 raise NoResultsError
74 end
75 end
76 end
77
78 def url(book)
79 BASE_URI + "/isbn/" + book.isbn
80 end
81
82 #######
83 private
84 #######
85
86 def to_book(data)
87 raise NoResultsError if /<br><p>The page you tried was not found\./.match(data) != nil
88
89 raise unless md = /<h1 class="item-title"> ?(<div class=vernacular lang="[^"]+">)?([^<]+)/.match(data)
90 title = CGI.unescape(md[2].strip)
91
92 authors = []
93 md = data.scan(/title="Search for more by this author">([^<]+)/)
94 # raise "No authors" unless md.length > 0
95 md = md.collect {|match| match[0]}
96 md.each {|match|
97 CGI.unescape(match.strip)
98 authors << match
99 }
100 # md[1].strip.split(', ').each { |a| authors << CGI.unescape(a.strip) }
101
102 # FIXME: The provider returns the first ISBN found. When searching by
103 # ISBN, it should instead return the ISBN searched
104 # Example: http://worldcat.org/isbn/9780805335576
105
106 if md = /<strong>ISBN: <\/strong>\w+\W+(\d+)\D/.match(data)
107 isbn = md[1].strip
108 else
109 isbn = nil
110 end
111
112 # The provider returns
113 # City : Publisher[ ; City2 : Publisher2], *year? [©year]
114 # currently the match is not good in case of City2 : Publisher2 and in case of ©year
115
116 # FIXME: if the field 'Publisher' contains "| Other Editions ..." (as for 9788441000469), then this regexp doesn't match;
117 # if not (as for 9785941454136), it is OK.
118 if md = /<div class="item-publisher"><strong>Publisher: <\/strong>(<span class=vernacular lang="[^<]+<\/span>)?[^:<]+ : ([^<]+), [^,<]*(\d\d\d\d).?<\/div>/.match(data)
119 publisher = CGI.unescape(md[2].strip)
120 publish_year = CGI.unescape(md[3].strip)[-4 .. -1].to_i
121 publish_year = nil if publish_year == 0
122 else
123 publisher = nil
124 publish_year = nil
125 end
126
127 edition = nil
128
129
130 if md = /<td class="illustration"><img src="([^"]+)/.match(data)
131 cover_url = BASE_URI + md[1].strip
132 cover_filename = isbn + ".tmp"
133 Dir.chdir(CACHE_DIR) do
134 File.open(cover_filename, "w") do |file|
135 file.write open(cover_url, "Referer" => REFERER ).read
136 end
137 end
138
139 medium_cover = CACHE_DIR + "/" + cover_filename
140 if File.size(medium_cover) > 0
141 puts medium_cover + " has non-0 size" if $DEBUG
142 return [ Book.new(title, authors, isbn, publisher, publish_year, edition),medium_cover ]
143 end
144 puts medium_cover + " has 0 size, removing ..." if $DEBUG
145 File.delete(medium_cover)
146 end
147 return [ Book.new(title, authors, isbn, publisher, publish_year, edition) ]
148 end
149
150 def each_book_page(data)
151 raise if data.scan(/<div class="name"><a href="\/oclc\/(\d+)&/) { |a| yield a}.empty?
152 end
153
154 def clean_cache
155 #FIXME begin ... rescue ... end?
156 Dir.chdir(CACHE_DIR) do
157 Dir.glob("*.tmp") do |file|
158 puts "removing " + file if $DEBUG
159 File.delete(file)
160 end
161 end
162 end
163 end
164 end
165 end
Generated using the rcov code coverage analysis tool for Ruby version 0.8.0.