Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def interest_over_time(self):
"""Request data from Google's Interest Over Time section and return a dataframe"""
over_time_payload = {
# convert to string as requests will mangle
"req": json.dumps(self.interest_over_time_widget["request"]),
"token": self.interest_over_time_widget["token"],
"tz": self.tz,
}
# make the request and parse the returned json
req_json = self._get_data(
url=TrendReq.INTEREST_OVER_TIME_URL,
method=TrendReq.GET_METHOD,
trim_chars=5,
params=over_time_payload,
)
df = pd.DataFrame(req_json["default"]["timelineData"])
if df.empty:
return df
df["date"] = pd.to_datetime(df["time"].astype(dtype="float64"), unit="s")
df = df.set_index(["date"]).sort_index()
# split list columns into seperate ones, remove brackets and split on comma
result_df = df["value"].apply(
lambda x: pd.Series(str(x).replace("[", "").replace("]", "").split(","))
)
# rename each column with its search term, relying on order that google provides...
def suggestions(self, keyword):
"""Request data from Google's Keyword Suggestion dropdown and return a dictionary"""
# make the request
kw_param = quote(keyword)
parameters = {"hl": self.hl}
req_json = self._get_data(
url=TrendReq.SUGGESTIONS_URL + kw_param,
params=parameters,
method=TrendReq.GET_METHOD,
trim_chars=5,
)["default"]["topics"]
return req_json
def _tokens(self):
"""Makes request to Google to get API tokens for interest over time, interest by region and related queries"""
# make the request and parse the returned json
widget_dict = self._get_data(
url=TrendReq.GENERAL_URL,
method=TrendReq.GET_METHOD,
params=self.token_payload,
trim_chars=4,
)["widgets"]
# order of the json matters...
first_region_token = True
# clear self.related_queries_widget_list and self.related_topics_widget_list
# of old keywords'widgets
self.related_queries_widget_list[:] = []
self.related_topics_widget_list[:] = []
# assign requests
for widget in widget_dict:
if widget["id"] == "TIMESERIES":
self.interest_over_time_widget = widget
if widget["id"] == "GEO_MAP" and first_region_token:
self.interest_by_region_widget = widget
def categories(self):
"""Request available categories data from Google's API and return a dictionary"""
params = {"hl": self.hl}
req_json = self._get_data(
url=TrendReq.CATEGORIES_URL,
params=params,
method=TrendReq.GET_METHOD,
trim_chars=5,
)
return req_json
def trending_searches(self, pn="united_states"):
"""Request data from Google's Hot Searches section and return a dataframe"""
# make the request
# forms become obsolute due to the new TRENDING_SEACHES_URL
# forms = {'ajax': 1, 'pn': pn, 'htd': '', 'htv': 'l'}
req_json = self._get_data(
url=TrendReq.TRENDING_SEARCHES_URL, method=TrendReq.GET_METHOD
)[pn]
result_df = pd.DataFrame(req_json)
return result_df
def today_searches(self, pn="US"):
"""Request data from Google Daily Trends section and returns a dataframe"""
forms = {"ns": 15, "geo": pn, "tz": "-180", "hl": "en-US"}
req_json = self._get_data(
url=TrendReq.TODAY_SEARCHES_URL,
method=TrendReq.GET_METHOD,
trim_chars=5,
params=forms,
)["default"]["trendingSearchesDays"][0]["trendingSearches"]
result_df = pd.DataFrame()
# parse the returned json
sub_df = pd.DataFrame()
for trend in req_json:
sub_df = sub_df.append(trend["title"], ignore_index=True)
result_df = pd.concat([result_df, sub_df])
return result_df.iloc[:, -1]
def suggestions(self, keyword):
"""Request data from Google's Keyword Suggestion dropdown and return a dictionary"""
# make the request
kw_param = quote(keyword)
parameters = {"hl": self.hl}
req_json = self._get_data(
url=TrendReq.SUGGESTIONS_URL + kw_param,
params=parameters,
method=TrendReq.GET_METHOD,
trim_chars=5,
)["default"]["topics"]
return req_json
# make the request
related_payload = dict()
result_dict = dict()
for request_json in self.related_queries_widget_list:
# ensure we know which keyword we are looking at rather than relying on order
kw = request_json["request"]["restriction"]["complexKeywordsRestriction"][
"keyword"
][0]["value"]
# convert to string as requests will mangle
related_payload["req"] = json.dumps(request_json["request"])
related_payload["token"] = request_json["token"]
related_payload["tz"] = self.tz
# parse the returned json
req_json = self._get_data(
url=TrendReq.RELATED_QUERIES_URL,
method=TrendReq.GET_METHOD,
trim_chars=5,
params=related_payload,
)
# top queries
try:
top_df = pd.DataFrame(
req_json["default"]["rankedList"][0]["rankedKeyword"]
)
top_df = top_df[["query", "value"]]
except KeyError:
# in case no top queries are found, the lines above will throw a KeyError
top_df = None
# rising queries
The column f'{word}_unscaled' is the original daily data fetched
month by month, and it is not comparable across different months
(but is comparable within a month).
The column f'{word}_monthly' contains the original monthly data
fetched at once. The values in this column have been backfilled
so that there are no NaN present.
The column 'scale' contains the scale used to obtain the scaled
daily data.
"""
# Set up start and stop dates
start_date = date(start_year, start_mon, 1)
stop_date = get_last_date_of_month(stop_year, stop_mon)
# Start pytrends for US region
pytrends = TrendReq(hl='en-US', tz=360)
# Initialize build_payload with the word we need data for
build_payload = partial(pytrends.build_payload,
kw_list=[word], cat=0, geo=geo, gprop='')
# Obtain monthly data for all months in years [start_year, stop_year]
monthly = _fetch_data(pytrends, build_payload,
convert_dates_to_timeframe(start_date, stop_date))
# Get daily data, month by month
results = {}
# if a timeout or too many requests error occur we need to adjust wait time
current = start_date
while current < stop_date:
last_date_of_month = get_last_date_of_month(current.year, current.month)
timeframe = convert_dates_to_timeframe(current, last_date_of_month)
if verbose:
def top_charts(self, date, hl="en-US", tz=300, geo="GLOBAL"):
"""Request data from Google's Top Charts section and return a dataframe"""
# create the payload
chart_payload = {
"hl": hl,
"tz": tz,
"date": date,
"geo": geo,
"isMobile": False,
}
# make the request and parse the returned json
req_json = self._get_data(
url=TrendReq.TOP_CHARTS_URL,
method=TrendReq.GET_METHOD,
trim_chars=5,
params=chart_payload,
)["topCharts"][0]["listItems"]
df = pd.DataFrame(req_json)
return df