-
Notifications
You must be signed in to change notification settings - Fork 0
/
textfiles.py
53 lines (34 loc) · 1.68 KB
/
textfiles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from conjure import text_conjure, json_conjure, LmdbCollection, conjure, JSONSerializer, JSONDeserializer
from conjure.identifier import FunctionContentIdentifier, ParamsHash
collection = LmdbCollection('textfiles')
custom = conjure(
content_type='text/plain',
storage=collection,
func_identifier=FunctionContentIdentifier(),
param_identifier=ParamsHash(),
serializer=JSONSerializer(),
deserializer=JSONDeserializer()
)
def textfile_index(path: str, content: bytes):
"""
TODOs:
- this key should be func_INDEXVALUE_paramshash, to allow for prefix = func_INDEXVALUE queries
and to ensure that identical values are not overwritten. for a single call to the index function
func and paramshash would remain constant, while INDEXVALUE would vary
- index conjure functions should be able to return multiple values, i.e., they are iterators
-
What if, instead of trying to re-use conjure, indexes are truly something different?
they are expected to return an iterator of (key, value) pairs where keys are bytes
and values are json, required to have a special key property that points to the main collection?
The big difference that's keeping me from using conure as-is is the fact that I need _results_
to compute a key for indexes
For conjure, if I need the result to compute the key, that defeats the whole purpose!
"""
words = set(map(lambda x: x.strip().lower(), content.decode().split()))
for word in words:
yield word, { 'title': path }
# @text_conjure(collection)
def get_textfile(path):
import requests
resp = requests.get(f'http://textfiles.com/{path}')
return resp.content