Skip to content

Commit 9b778be

Browse files
committed
exclude another collection from the feed
1 parent fb19629 commit 9b778be

File tree

1 file changed

+44
-44
lines changed

1 file changed

+44
-44
lines changed

opds.py

Lines changed: 44 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#!/usr/bin/python2.5
1+
#!/usr/bin/env python
22

33
#Copyright(c)2009 Internet Archive. Software license GPL version 3.
44

@@ -30,7 +30,7 @@
3030
'url_base' : '/catalog',
3131
'urnroot' : 'urn:x-internet-archive:bookserver:catalog',
3232
'solr_base' : 'http://se.us.archive.org:8983/solr/select?fl=identifier,title,creator,publicdate,date,contributor,publisher,subject,language,format,month&wt=json',
33-
'query_base' : 'format%3Aabbyy+AND+format%3Ascandata+AND+format%3Apdf+AND+NOT+ocr%3A%22language+not%22+AND+NOT+collection%3Alendinglibrary+AND+NOT+collection%3Aopensource+AND+NOT+collection%3Aprintdisabled'
33+
'query_base' : 'format%3Aabbyy+AND+format%3Ascandata+AND+format%3Apdf+AND+NOT+ocr%3A%22language+not%22+AND+NOT+collection%3Alendinglibrary+AND+NOT+collection%3Aopensource+AND+NOT+collection%3Aprintdisabled+AND+NOT+collection%3Arosettaproject'
3434
}
3535

3636
urls = (
@@ -44,7 +44,7 @@
4444
'/search(.*)', 'htmlsearch',
4545
'/crawlable(?:/(.*))?(|.html)', 'crawlable',
4646
'/(|index.html)', 'index',
47-
'/(.*)', 'indexRedirect',
47+
'/(.*)', 'indexRedirect',
4848
)
4949

5050
application = web.application(urls, globals()).wsgifunc()
@@ -57,17 +57,17 @@ def getDateString():
5757
#IA is continuously scanning books. Since this OPDS file is constructed
5858
#from search engine results, let's change the updated date every midnight
5959
t = time.gmtime()
60-
datestr = time.strftime('%Y-%m-%dT%H:%M:%SZ',
60+
datestr = time.strftime('%Y-%m-%dT%H:%M:%SZ',
6161
(t.tm_year, t.tm_mon, t.tm_mday, 0, 0, 0, 0, 0, 0))
6262
return datestr
63-
63+
6464
def getEnv(key, default = None):
6565
env = web.ctx['environ']
6666
if env.has_key(key):
6767
return env[key]
6868
else:
6969
return default
70-
70+
7171
def getDevice():
7272
userAgent = getEnv('HTTP_USER_AGENT')
7373
if userAgent is not None:
@@ -85,7 +85,7 @@ def GET(self, url):
8585
mode = 'html'
8686

8787
datestr = getDateString()
88-
88+
8989
c = catalog.Catalog(
9090
title = 'Internet Archive Catalog',
9191
urn = pubInfo['urnroot'],
@@ -108,46 +108,46 @@ def GET(self, url):
108108
'new': 'new'
109109
}
110110
type = 'application/atom+xml'
111-
111+
112112
l = catalog.Link(url = links['alpha'], type = type)
113113
e = catalog.Entry({'title' : 'Alphabetical By Title',
114114
'urn' : pubInfo['urnroot'] + ':titles:all',
115115
'updated' : datestr,
116116
'content' : 'Alphabetical list of all titles.'
117117
}, links=(l,))
118118
c.addEntry(e)
119-
119+
120120
l = catalog.Link(url = links['downloads'], type = type)
121121
e = catalog.Entry({'title' : 'Most Downloaded Books',
122122
'urn' : pubInfo['urnroot'] + ':downloads',
123123
'updated' : datestr,
124124
'content' : 'The most downloaded books from the Internet Archive in the last month.'
125125
}, links=(l,))
126-
126+
127127
c.addEntry(e)
128128

129-
l = catalog.Link(url = links['new'], type = type)
129+
l = catalog.Link(url = links['new'], type = type)
130130
e = catalog.Entry({'title' : 'Recent Scans',
131131
'urn' : pubInfo['urnroot'] + ':new',
132132
'updated' : datestr,
133133
'content' : 'Books most recently scanned by the Internet Archive.'
134134
}, links=(l,))
135-
135+
136136
c.addEntry(e)
137-
137+
138138
osDescriptionDoc = 'http://bookserver.archive.org/catalog/opensearch.xml'
139139
o = catalog.OpenSearch(osDescriptionDoc)
140140
c.addOpenSearch(o)
141-
141+
142142
if url and url.endswith('.html'):
143143
r = output.ArchiveCatalogToHtml(c, device = getDevice())
144144
web.header('Content-Type', 'text/html')
145145
return r.toString()
146-
else:
146+
else:
147147
r = output.CatalogToAtom(c)
148148
web.header('Content-Type', pubInfo['mimetype'])
149149
return r.toString()
150-
150+
151151

152152
# /alpha/a/0
153153
#______________________________________________________________________________
@@ -162,7 +162,7 @@ def GET(self, letter, start):
162162
start = start[:-5]
163163
mode = 'html'
164164
start = int(start)
165-
165+
166166
solrUrl = pubInfo['solr_base']+'&q='+pubInfo['query_base']+'+AND+firstTitle%3A'+letter.upper()+'&sort=titleSorter+asc&rows='+str(numRows)+'&start='+str(start*numRows)
167167
titleFragment = 'books starting with "%s"' % (letter.upper())
168168
urn = pubInfo['urnroot'] + ':%s:%d'%(letter, start)
@@ -172,7 +172,7 @@ def GET(self, letter, start):
172172
urlBase='/catalog/alpha/%s/' % (letter),
173173
titleFragment = titleFragment)
174174
c = ingestor.getCatalog()
175-
175+
176176
if 'html' == mode:
177177
web.header('Content-Type', 'text/html')
178178
r = output.ArchiveCatalogToHtml(c, device = getDevice())
@@ -181,7 +181,7 @@ def GET(self, letter, start):
181181
web.header('Content-Type', pubInfo['mimetype'])
182182
r = output.CatalogToAtom(c, fabricateContentElement=True)
183183
return r.toString()
184-
184+
185185
# /alpha.xml
186186
#______________________________________________________________________________
187187
class alphaList:
@@ -197,7 +197,7 @@ def GET(self, extension):
197197
#TODO: create a version of /alpha.xml with the correct updated dates,
198198
#and cache it for an hour to ease load on solr
199199
datestr = getDateString()
200-
200+
201201
c = catalog.Catalog(
202202
title = 'Internet Archive - All Titles',
203203
urn = pubInfo['urnroot'] + ':titles:all',
@@ -217,8 +217,8 @@ def GET(self, extension):
217217
linkType = 'application/atom+xml'
218218
else:
219219
raise ValueError('Unsupported extension %s' % extension)
220-
221-
l = catalog.Link(url = self.alphaURL(extension, lower, 0), type = linkType)
220+
221+
l = catalog.Link(url = self.alphaURL(extension, lower, 0), type = linkType)
222222
e = catalog.Entry({'title' : 'Titles: ' + letter,
223223
'urn' : pubInfo['urnroot'] + ':titles:'+lower,
224224
'updated' : datestr,
@@ -229,7 +229,7 @@ def GET(self, extension):
229229
osDescriptionDoc = 'http://bookserver.archive.org/catalog/opensearch.xml'
230230
o = catalog.OpenSearch(osDescriptionDoc)
231231
c.addOpenSearch(o)
232-
232+
233233
if ('xml' == extension):
234234
web.header('Content-Type', pubInfo['mimetype'])
235235
r = output.CatalogToAtom(c)
@@ -249,7 +249,7 @@ def GET(self, extension):
249249
urn = pubInfo['urnroot'] + ':downloads'
250250
ingestor = catalog.ingest.IASolrToCatalog(pubInfo, solrUrl, urn, titleFragment=titleFragment)
251251
c = ingestor.getCatalog()
252-
252+
253253
if ('xml' == extension):
254254
web.header('Content-Type', pubInfo['mimetype'])
255255
r = output.CatalogToAtom(c, fabricateContentElement=True)
@@ -269,16 +269,16 @@ def GET(self, start, extension):
269269
extension = 'html'
270270
else:
271271
extension = 'xml'
272-
272+
273273
if not start:
274274
start = 0
275275
else:
276276
if start.endswith('.html'):
277277
extension = 'html'
278278
start = start[:-5]
279279
start = int(start)
280-
281-
280+
281+
282282
solrUrl = pubInfo['solr_base'] + '&q='+pubInfo['query_base']+'&sort=publicdate+desc&rows='+str(numRows)+'&start='+str(start*numRows)
283283
titleFragment = 'books sorted by update date'
284284
urn = pubInfo['urnroot'] + ':new:%d' % (start)
@@ -287,7 +287,7 @@ def GET(self, start, extension):
287287
urlBase='/catalog/new/',
288288
titleFragment = titleFragment)
289289
c = ingestor.getCatalog()
290-
290+
291291
if 'html' == extension:
292292
web.header('Content-Type', 'text/html')
293293
r = output.ArchiveCatalogToHtml(c, device = getDevice())
@@ -305,15 +305,15 @@ def GET(self, start, extension):
305305
extension = 'html'
306306
else:
307307
extension = 'xml'
308-
308+
309309
if not start:
310310
start = 0
311311
else:
312312
if start.endswith('.html'):
313313
extension = 'html'
314314
start = start[:-5]
315315
start = int(start)
316-
316+
317317
crawlNumRows = 1000;
318318
solrUrl = pubInfo['solr_base'] + '&q='+pubInfo['query_base']+'&rows='+str(crawlNumRows)+'&start='+str(start*crawlNumRows)
319319
titleFragment = '- crawlable feed'
@@ -323,7 +323,7 @@ def GET(self, start, extension):
323323
urlBase='/catalog/crawlable/',
324324
titleFragment = titleFragment)
325325
c = ingestor.getCatalog()
326-
326+
327327
if 'html' == extension:
328328
web.header('Content-Type', 'text/html')
329329
r = output.ArchiveCatalogToHtml(c, device = getDevice())
@@ -335,7 +335,7 @@ def GET(self, start, extension):
335335

336336

337337
# /opensearch
338-
#______________________________________________________________________________
338+
#______________________________________________________________________________
339339
class opensearch:
340340
def GET(self, query):
341341
params = cgi.parse_qs(web.ctx.query)
@@ -346,7 +346,7 @@ def GET(self, query):
346346
start = int(params['start'][0])
347347

348348
q = params['?q'][0]
349-
qq = urllib.quote(q)
349+
qq = urllib.quote(q)
350350
solrUrl = pubInfo['solr_base'] + '&q='+qq+'+AND+'+pubInfo['query_base']+'&sort=month+desc&rows='+str(numRows)+'&start='+str(start*numRows)
351351
titleFragment = 'search results for ' + q
352352
urn = pubInfo['urnroot'] + ':search:%s:%d' % (qq, start)
@@ -361,15 +361,15 @@ def GET(self, query):
361361
web.header('Content-Type', pubInfo['mimetype'])
362362
r = output.CatalogToAtom(c, fabricateContentElement=True)
363363
return r.toString()
364-
364+
365365
# /search
366-
#______________________________________________________________________________
366+
#______________________________________________________________________________
367367
class htmlsearch:
368368
def GET(self, query):
369369
qs = web.ctx.query
370370
if qs.startswith('?'):
371371
qs = qs[1:]
372-
372+
373373
params = cgi.parse_qs(qs)
374374

375375
if not 'start' in params:
@@ -382,7 +382,7 @@ def GET(self, query):
382382

383383
q = params['q'][0]
384384
qq = urllib.quote(q)
385-
solrUrl = 'http://se.us.archive.org:8983/solr/select?q='+qq+'+AND+'+pubInfo['query_base']+'&fl=identifier,title,creator,oai_updatedate,date,contributor,publisher,subject,language,format&rows='+str(numRows)+'&start='+str(start*numRows)+'&wt=json'
385+
solrUrl = 'http://se.us.archive.org:8983/solr/select?q='+qq+'+AND+'+pubInfo['query_base']+'&fl=identifier,title,creator,oai_updatedate,date,contributor,publisher,subject,language,format&rows='+str(numRows)+'&start='+str(start*numRows)+'&wt=json'
386386
titleFragment = 'search results for ' + q
387387
urn = pubInfo['urnroot'] + ':search:%s:%d' % (qq, start)
388388

@@ -392,40 +392,40 @@ def GET(self, query):
392392
titleFragment = titleFragment)
393393

394394
c = ingestor.getCatalog()
395-
395+
396396
web.header('Content-Type', 'text/html')
397397
r = output.ArchiveCatalogToHtml(c, device = getDevice())
398398
return r.toString()
399399

400400
# /opensearch.xml - Open Search Description
401-
#______________________________________________________________________________
401+
#______________________________________________________________________________
402402
class openSearchDescription:
403403
def GET(self):
404404
web.header('Content-Type', 'application/atom+xml')
405405
return """<?xml version="1.0" encoding="UTF-8"?>
406406
<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/">
407407
<ShortName>Internet Archive Search</ShortName>
408408
<Description>Search archive.org's OPDS Catalog.</Description>
409-
<Url type="application/atom+xml"
409+
<Url type="application/atom+xml"
410410
template="%s/opensearch?q={searchTerms}&amp;start={startPage?}"/>
411411
</OpenSearchDescription>""" % (pubInfo['opdsroot'])
412412

413413

414414
# redirect to remove trailing slash
415-
#______________________________________________________________________________
415+
#______________________________________________________________________________
416416
class redirect:
417417
def GET(self, path):
418418
web.seeother('/' + path)
419419

420420
# redirect to index
421-
#______________________________________________________________________________
421+
#______________________________________________________________________________
422422
class indexRedirect:
423423
def GET(self, path):
424424
web.seeother('/')
425425

426-
426+
427427
# main() - standalone mode
428-
#______________________________________________________________________________
428+
#______________________________________________________________________________
429429
if __name__ == "__main__":
430430
#run in standalone mode
431431
app = web.application(urls, globals())

0 commit comments

Comments
 (0)