new_frontera
stable
new_frontera at a glance
Run modes
Quick start single process
Quick start distributed mode
Cluster setup guide
Installation Guide
Crawling strategies
Frontier objects
Middlewares
Canonical URL Solver
Backends
Message bus
Writing custom crawling strategy
Using the Frontier with Scrapy
Settings
What is a Crawl Frontier?
Graph Manager
Recording a Scrapy crawl
Fine tuning of new_frontera cluster
DNS Service
Architecture overview
new_frontera API
Using the Frontier with Requests
Examples
Tests
Logging
Testing a Frontier
Contribution guidelines
Glossary
new_frontera
Index
Edit on GitHub
Index
_
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
L
|
M
|
N
|
P
|
Q
|
R
|
S
|
U
_
__contains__() (new_frontera.core.components.DomainMetadata method)
__delitem__() (new_frontera.core.components.DomainMetadata method)
__getitem__() (new_frontera.core.components.DomainMetadata method)
__setitem__() (new_frontera.core.components.DomainMetadata method)
B
Backend (class in new_frontera.core.components)
BaseCrawlingStrategy (class in new_frontera.strategy)
BaseDecoder (class in new_frontera.core.codec)
BaseEncoder (class in new_frontera.core.codec)
BasicCanonicalSolver (class in new_frontera.contrib.canonicalsolvers.basic)
body (new_frontera.core.models.Request property)
(new_frontera.core.models.Response property)
C
close() (new_frontera.strategy.BaseCrawlingStrategy method)
Component (class in new_frontera.core.components)
cookies (new_frontera.core.models.Request property)
count() (new_frontera.core.components.Queue method)
crawling strategy
CrawlPage (built-in class)
create_request() (new_frontera.strategy.BaseCrawlingStrategy method)
D
db worker
db_worker() (new_frontera.core.components.DistributedBackend class method)
decode() (new_frontera.core.codec.BaseDecoder method)
decode_request() (new_frontera.core.codec.BaseDecoder method)
DistributedBackend (class in new_frontera.core.components)
DomainFingerprintMiddleware (class in new_frontera.contrib.middlewares.fingerprint)
DomainMetadata (class in new_frontera.core.components)
DomainMiddleware (class in new_frontera.contrib.middlewares.domain)
E
encode_new_job_id() (new_frontera.core.codec.BaseEncoder method)
encode_offset() (new_frontera.core.codec.BaseEncoder method)
encode_page_crawled() (new_frontera.core.codec.BaseEncoder method)
encode_request() (new_frontera.core.codec.BaseEncoder method)
encode_request_error() (new_frontera.core.codec.BaseEncoder method)
encode_update_score() (new_frontera.core.codec.BaseEncoder method)
F
fetch() (new_frontera.core.components.States method)
filter_extracted_links() (new_frontera.strategy.BaseCrawlingStrategy method)
finished() (new_frontera.core.components.Backend method)
(new_frontera.strategy.BaseCrawlingStrategy method)
flush() (new_frontera.core.components.States method)
from_manager() (new_frontera.core.components.Backend class method)
(new_frontera.core.components.Component class method)
(new_frontera.core.components.Middleware class method)
from_worker() (new_frontera.strategy.BaseCrawlingStrategy class method)
frontier_start() (new_frontera.core.components.Backend method)
(new_frontera.core.components.Component method)
(new_frontera.core.components.Middleware method)
frontier_stop() (new_frontera.core.components.Backend method)
(new_frontera.core.components.Component method)
(new_frontera.core.components.Middleware method)
G
get_next_requests() (new_frontera.core.components.Backend method)
(new_frontera.core.components.Queue method)
H
headers (new_frontera.core.models.Request property)
(new_frontera.core.models.Response property)
hostname_local_fingerprint() (in module new_frontera.utils.fingerprint)
I
id (CrawlPage attribute)
is_seed (CrawlPage attribute)
L
links (CrawlPage attribute)
links_extracted() (new_frontera.strategy.BaseCrawlingStrategy method)
M
MemoryDistributedBackend (class in new_frontera.contrib.backends.memory)
message bus
MessageBusBackend (class in new_frontera.contrib.backends.remote.messagebus)
meta (new_frontera.core.models.Request property)
(new_frontera.core.models.Response property)
Metadata (class in new_frontera.core.components)
metadata (new_frontera.core.components.Backend attribute)
method (new_frontera.core.models.Request property)
Middleware (class in new_frontera.core.components)
module
new_frontera.contrib.backends.remote.codecs.json
N
name (new_frontera.core.components.Component attribute)
new_frontera.contrib.backends.remote.codecs.json
module
P
page_crawled() (new_frontera.core.components.Backend method)
(new_frontera.core.components.Component method)
(new_frontera.core.components.Metadata method)
(new_frontera.core.components.Middleware method)
(new_frontera.strategy.BaseCrawlingStrategy method)
Q
Queue (class in new_frontera.core.components)
queue (new_frontera.core.components.Backend attribute)
R
read_seeds() (new_frontera.strategy.BaseCrawlingStrategy method)
referers (CrawlPage attribute)
refresh_states() (new_frontera.strategy.BaseCrawlingStrategy method)
Request (class in new_frontera.core.models)
request (new_frontera.core.models.Response property)
request_error() (new_frontera.core.components.Backend method)
(new_frontera.core.components.Component method)
(new_frontera.core.components.Metadata method)
(new_frontera.core.components.Middleware method)
(new_frontera.strategy.BaseCrawlingStrategy method)
Response (class in new_frontera.core.models)
S
schedule() (new_frontera.core.components.Queue method)
(new_frontera.strategy.BaseCrawlingStrategy method)
scoring log
set_states() (new_frontera.core.components.States method)
Settings (class in new_frontera.settings)
spider
spider feed
spider log
state cache
States (class in new_frontera.core.components)
states (new_frontera.core.components.Backend attribute)
status (CrawlPage attribute)
status_code (new_frontera.core.models.Response property)
strategy worker
strategy_worker() (new_frontera.core.components.DistributedBackend class method)
U
update_cache() (new_frontera.core.components.States method)
url (CrawlPage attribute)
(new_frontera.core.models.Request property)
(new_frontera.core.models.Response property)
UrlFingerprintMiddleware (class in new_frontera.contrib.middlewares.fingerprint)