Source code for openwpm.utilities.db_utils
import sqlite3
from collections.abc import Iterable
from pathlib import Path
from typing import Any, AnyStr, Iterator, List, Optional, Tuple, Union
import plyvel
[docs]
def query_db(
db: Path, query: str, params: Any = None, as_tuple: bool = False
) -> List[Union[sqlite3.Row, Tuple[Any, ...]]]:
"""Run a query against the given db.
If params is not None, securely construct a query from the given
query string and params.
"""
with sqlite3.connect(db) as con:
if not as_tuple:
con.row_factory = sqlite3.Row
if params is None:
rows = con.execute(query).fetchall()
else:
rows = con.execute(query, params).fetchall()
return rows
[docs]
def get_content(db_name: Path) -> Iterator[Tuple[AnyStr, AnyStr]]:
"""Yield key, value pairs from the deduplicated leveldb content database
Parameters
----------
db_name : Path
The full path to the current db
"""
db = plyvel.DB(str(db_name), create_if_missing=False, compression="snappy")
for content_hash, content in db.iterator():
yield content_hash, content
db.close()
[docs]
def get_javascript_entries(
db: Path, all_columns: bool = False, as_tuple: bool = False
) -> List[Union[Tuple[Any, ...], sqlite3.Row]]:
if all_columns:
select_columns = "*"
else:
select_columns = "script_url, symbol, operation, value, arguments"
return query_db(db, f"SELECT {select_columns} FROM javascript", as_tuple=as_tuple)
[docs]
def any_command_failed(db: Path) -> bool:
"""Returns True if any command in a given database failed"""
rows = query_db(db, "SELECT * FROM crawl_history;")
for row in rows:
assert isinstance(row, sqlite3.Row)
if row["command_status"] != "ok":
return True
return False