Source code for dhlab.api.nb_ngram_api
import json
import networkx as nx
import requests
from dhlab.constants import GALAXY_API, NGRAM_API
[docs]
def get_ngram(terms: str, corpus: str = "avis", lang: str = "nob") -> dict:
"""Fetch raw and relative frequencies for the ``terms``.
Call the :py:data:`NGRAM_API`.
The frequencies are aggregated per year between 1800-2021.
:param str terms: comma separated string of words
:param str corpus: type of documents to search through
:return: table of annual frequency counts per term
"""
req = requests.get(
NGRAM_API,
params={
'terms': terms,
'corpus': corpus,
'lang':lang
}
)
if req.status_code == 200:
res = req.text
else:
res = "[]"
return json.loads(res)
[docs]
def make_word_graph(
words: str, corpus: str = 'all', cutoff: int = 16, leaves: int = 0
) -> nx.DiGraph:
"""Get galaxy from ngram-database.
Call the :py:obj:`~dhlab.constants.GALAXY_API` endpoint.
:param str words: comma-separated string of words
:param str corpus: document type: ``'book'``, ``'avis'``, or ``'all'``,
:param int cutoff: Number of nodes to include.
:param int leaves: Set leaves=1 to get the leaves.
:return: A `networkx.DiGraph` with the results.
"""
params = dict()
params['terms'] = words
params['corpus'] = corpus
params['limit'] = cutoff
params['leaves'] = leaves
result = requests.get(GALAXY_API, params=params)
G = nx.DiGraph()
edgelist = []
if result.status_code == 200:
graph = json.loads(result.text)
# print(graph)
nodes = graph['nodes']
edges = graph['links']
for edge in edges:
edgelist += [
(nodes[edge['source']]['name'],
nodes[edge['target']]['name'],
abs(edge['value']))
]
# print(edgelist)
G.add_weighted_edges_from(edgelist)
return G