Найти в Дзене

Парсинг ключевых слов в keys.so по api

python3 -m pip install requests import requests import csv import os import time API_URL = "https://api.keys.so/report/system_keywords" TOKEN = "YOUR_TOKEN" HEADERS = { "Content-Type": "application/json", "Accept": "application/json", "X-Keyso-TOKEN": TOKEN, } QUERY_PARAMS = { "strict": "true", "hideadult": "true", "sort": "string", } FILTER = "weight>=100^ws>=10000^avbid<=1^adscnt<=1" PER_PAGE = 500 STATE_FILE = "page_state.txt" CSV_FILE = "system_keywords.csv" # ---------- state ---------- def load_page(): if os.path.exists(STATE_FILE): with open(STATE_FILE, "r") as f: return int(f.read().strip()) return 1 def save_page(page): with open(STATE_FILE, "w") as f: f.write(str(page)) # ---------- csv ---------- def append_to_csv(rows): if not rows: return file_exists = os.path.exists(CSV_FILE) with open(CSV_FILE, "a", newline="", encoding="utf-8") as f: writer = csv.DictWriter(f, fieldnames=rows[0].keys()) if not file_exists: writer.writeheader() writer.writerows(rows) # ---------- main
Оглавление

Чтобы скачать базу ключевых запросов и увидеть ее в таком виде
Чтобы скачать базу ключевых запросов и увидеть ее в таком виде

Подготавливаем python

python3 -m pip install requests

Скрипт python

import requests
import csv
import os
import time
API_URL = "https://api.keys.so/report/system_keywords"
TOKEN = "YOUR_TOKEN"
HEADERS = {
"Content-Type": "application/json",
"Accept": "application/json",
"X-Keyso-TOKEN": TOKEN,
}
QUERY_PARAMS = {
"strict": "true",
"hideadult": "true",
"sort": "string",
}
FILTER = "weight>=100^ws>=10000^avbid<=1^adscnt<=1"
PER_PAGE = 500
STATE_FILE = "page_state.txt"
CSV_FILE = "system_keywords.csv"
# ---------- state ----------
def load_page():
if os.path.exists(STATE_FILE):
with open(STATE_FILE, "r") as f:
return int(f.read().strip())
return 1
def save_page(page):
with open(STATE_FILE, "w") as f:
f.write(str(page))
# ---------- csv ----------
def append_to_csv(rows):
if not rows:
return
file_exists = os.path.exists(CSV_FILE)
with open(CSV_FILE, "a", newline="", encoding="utf-8") as f:
writer = csv.DictWriter(f, fieldnames=rows[0].keys())
if not file_exists:
writer.writeheader()
writer.writerows(rows)
# ---------- main ----------
def main():
page = load_page()
print(f"▶ Старт с страницы: {page}")
while True:
print(f"Запрос страницы {page}")
payload = {
"params": {
"filter": FILTER,
"per_page": PER_PAGE,
"page": page,
"sort": "wsk|desk",
}
}
response = requests.post(
API_URL,
headers=HEADERS,
params=QUERY_PARAMS,
json=payload,
timeout=60,
)
response.raise_for_status()
result = response.json()
data = result.get("data", [])
if not data:
print("✅ Данные закончились")
break
append_to_csv(data)
save_page(page + 1)
print(f"✔ Сохранено строк: {len(data)}")
page += 1
if len(data) < PER_PAGE:
print("✅ Последняя страница")
break
time.sleep(0.3) # защита от rate limit
if __name__ == "__main__":
main()

Структура для импорта ключевых слов

create table public.system_keywords
(
id integer,
word varchar(1000),
ws integer,
wsk integer,
superwsk integer,
docs integer,
numwords integer,
adscnt integer,
isgeo integer,
isquest integer,
updated_at varchar(10),
updated_at_date date
);
alter table public.system_keywords
owner to app_user;

Запрос для конвертации в дату

ALTER TABLE system_keywords
ALTER COLUMN updated_at TYPE date
USING to_date(updated_at, 'DD.MM.YYYY');

Или

UPDATE system_keywords
SET updated_at_date = to_date(updated_at, 'DD.MM.YYYY')
WHERE updated_at_date is null;