mirror of
https://github.com/yaroslaff/antifraud2gis.git
synced 2026-04-26 10:41:54 +00:00
sqlite3 search
This commit is contained in:
parent
17bf1235b4
commit
ee4a12e746
14 changed files with 213 additions and 21 deletions
11
contrib/create.sql
Normal file
11
contrib/create.sql
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
CREATE TABLE company (
|
||||
oid TEXT PRIMARY KEY,
|
||||
title TEXT,
|
||||
address TEXT,
|
||||
town TEXT,
|
||||
searchstr TEXT,
|
||||
rating_2gis REAL,
|
||||
trusted BOOLEAN,
|
||||
nreviews INTEGER,
|
||||
detections TEXT
|
||||
);
|
||||
|
|
@ -27,7 +27,8 @@ from ..exceptions import AFReportNotReady, AFNoCompany
|
|||
from ..tasks import submit_fraud_task, get_qsize
|
||||
from ..settings import settings
|
||||
from ..const import REDIS_TASK_QUEUE_NAME, REDIS_TRUSTED_LIST, REDIS_UNTRUSTED_LIST, REDIS_WORKER_STATUS
|
||||
from ..search import search
|
||||
# from ..search import search
|
||||
from ..companydb import dbsearch
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
|
|
@ -217,15 +218,14 @@ async def submit(request: Request, oid: str = Form(...), force: bool = Form(Fals
|
|||
|
||||
|
||||
|
||||
@app.post("/search", response_class=HTMLResponse)
|
||||
async def search_view(request: Request, query: str = Form(...)):
|
||||
# 15 actually
|
||||
@app.get("/search", response_class=HTMLResponse)
|
||||
async def search_view(request: Request, query: str):
|
||||
|
||||
if query.isdigit() and len(query) >= 12:
|
||||
print(f"redirect by id {query!r}")
|
||||
return RedirectResponse(app.url_path_for("report", oid=query), status_code=303)
|
||||
else:
|
||||
print(f"search for {query!r}")
|
||||
results = search(query, limit=25)
|
||||
# results = search(query, limit=25)
|
||||
results = dbsearch(query, limit=25)
|
||||
print(f"got {len(results)} results for {query!r}")
|
||||
|
||||
last_trusted = [json.loads(item) for item in r.lrange(REDIS_TRUSTED_LIST, 0, -1)]
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ from ..const import REDIS_TASK_QUEUE_NAME, REDIS_TRUSTED_LIST, REDIS_UNTRUSTED_L
|
|||
from ..logger import logger
|
||||
from ..session import session
|
||||
from ..utils import random_company
|
||||
from ..companydb import add_company, check_by_oid, get_by_oid, dbsearch
|
||||
|
||||
def countdown(n=5):
|
||||
for i in range(n, 0, -1):
|
||||
|
|
@ -144,7 +145,7 @@ def get_args():
|
|||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("cmd", choices=['company-users', 'user-reviews', 'queue', 'explore', 'ip'])
|
||||
parser.add_argument("cmd", choices=['company-users', 'user-reviews', 'queue', 'explore', 'ip', 'filldb'])
|
||||
parser.add_argument("-v", "--verbose", default=False, action='store_true')
|
||||
parser.add_argument("--full", default=False, action='store_true')
|
||||
parser.add_argument("args", nargs='*', help='extra args')
|
||||
|
|
@ -214,7 +215,6 @@ def main():
|
|||
|
||||
print(f"Python: {sys.version}")
|
||||
|
||||
|
||||
print(f"HTTPS_PROXY env variable: {os.getenv('HTTPS_PROXY', None)}")
|
||||
r = requests.get("https://ipinfo.io/ip", proxies={"https": None, "http": None})
|
||||
print(f"Direct IP: {r.text}")
|
||||
|
|
@ -244,6 +244,25 @@ def main():
|
|||
print(f"Meta code: {data['meta']['code']}, rating:{data['meta']['branch_rating']} count: {data['meta']['branch_reviews_count']}/{data['meta']['total_count']}")
|
||||
print(f"Reviews: {len(data['reviews'])}")
|
||||
|
||||
elif cmd == "filldb":
|
||||
inserted = 0
|
||||
exist = 0
|
||||
skipped = 0
|
||||
for c in cl.companies(oid=args.company, name=args.name, town=args.town, report=args.report, noreport=args.noreport):
|
||||
if skipped < 1210:
|
||||
skipped += 1
|
||||
continue
|
||||
|
||||
|
||||
if check_by_oid(c.object_id):
|
||||
exist += 1
|
||||
else:
|
||||
inserted += 1
|
||||
print(f"{inserted} add {c.object_id} {c.title}")
|
||||
add_company(c.export())
|
||||
print(f"Done. Inserted {inserted} records, {exist} already exists.")
|
||||
|
||||
|
||||
elif cmd == "explore":
|
||||
|
||||
if args.town is None:
|
||||
|
|
|
|||
|
|
@ -28,7 +28,8 @@ from ..exceptions import AFNoCompany, AFReportNotReady, AFReportAlreadyExists
|
|||
from ..settings import settings
|
||||
from ..statistics import statistics
|
||||
from ..aliases import resolve_alias
|
||||
from ..search import search
|
||||
# from ..search import search
|
||||
from ..companydb import dbsearch
|
||||
|
||||
# CLI
|
||||
from .summary import printsummary
|
||||
|
|
@ -125,7 +126,7 @@ def main():
|
|||
print(f"{len(report['relations'])} relations")
|
||||
|
||||
elif args.cmd == "search":
|
||||
res = search(args.args[0])
|
||||
res = dbsearch(args.args[0])
|
||||
for rec in res:
|
||||
print(rec)
|
||||
|
||||
|
|
@ -133,7 +134,7 @@ def main():
|
|||
elif args.cmd in ["list", "fraud", "delreport", "wipe", "submitfraud", "export"]:
|
||||
|
||||
# sanity check
|
||||
if args.cmd in ["submitfraud", "delreport", "wipe"] and not any_filter(args):
|
||||
if args.cmd in ["submitfraud", "fraud", "delreport", "wipe"] and not any_filter(args):
|
||||
print(f"Need company filter for {args.cmd}")
|
||||
sys.exit(1)
|
||||
|
||||
|
|
|
|||
|
|
@ -181,10 +181,15 @@ class Company:
|
|||
return len(self._reviews)
|
||||
|
||||
def count_rate(self):
|
||||
|
||||
|
||||
|
||||
self.ratings = list()
|
||||
for r in self._reviews:
|
||||
if r['rating'] is None:
|
||||
print_json(data=r)
|
||||
# 70000001006412601
|
||||
# rating could be None e.g. when provider=4sq
|
||||
continue
|
||||
if r['rating'] is not None:
|
||||
self.ratings.append(r['rating'])
|
||||
|
||||
|
|
@ -254,12 +259,16 @@ class Company:
|
|||
r.raise_for_status()
|
||||
data = r.json()
|
||||
|
||||
# branch review may exists, but not total_count
|
||||
# 70000001028529798
|
||||
|
||||
if self.total_count_2gis is None:
|
||||
self.total_count_2gis = data['meta']['total_count']
|
||||
self.branch_count_2gis = data['meta']['branch_reviews_count']
|
||||
self.branch_rating_2gis = data['meta']['branch_rating']
|
||||
# print(f"Total/Branch reviews count: {self.total_count_2gis}/{self.branch_count_2gis}")
|
||||
|
||||
|
||||
if self.total_count_2gis == 0 or self.branch_count_2gis == 0:
|
||||
raise AFNoCompany(f"No reviews for {self.object_id}")
|
||||
|
||||
|
|
@ -343,6 +352,7 @@ class Company:
|
|||
'rating_2gis': self.branch_rating_2gis,
|
||||
'trusted': self.trusted,
|
||||
'nreviews': self.nreviews(),
|
||||
'detections': ' '.join(self.detections)
|
||||
}
|
||||
|
||||
if self.trusted is None and self.report_path.exists():
|
||||
|
|
|
|||
86
src/antifraud2gis/companydb.py
Normal file
86
src/antifraud2gis/companydb.py
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
import sqlite3
|
||||
from .settings import settings
|
||||
from typing import Optional
|
||||
from rich import print_json
|
||||
|
||||
|
||||
"""
|
||||
CREATE TABLE company (
|
||||
oid TEXT PRIMARY KEY,
|
||||
title TEXT,
|
||||
address TEXT,
|
||||
town TEXT,
|
||||
searchstr TEXT,
|
||||
rating_2gis REAL,
|
||||
trusted BOOLEAN,
|
||||
nreviews INTEGER,
|
||||
detections TEXT
|
||||
);
|
||||
"""
|
||||
|
||||
def make_connection():
|
||||
return sqlite3.connect(settings.companydb)
|
||||
|
||||
# Function to check if oid exists in the "company" table
|
||||
def check_by_oid(oid: str, conn = None):
|
||||
conn = conn or make_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT 1 FROM company WHERE oid = ?", (oid,))
|
||||
result = cursor.fetchone()
|
||||
return result is not None # If result is None, the oid doesn't exist
|
||||
|
||||
def get_by_oid(oid: str, conn = None) -> Optional[dict]:
|
||||
conn = conn or make_connection()
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT * FROM company WHERE oid = ?", (oid,))
|
||||
row = cursor.fetchone()
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
# Map column names to values
|
||||
col_names = [desc[0] for desc in cursor.description]
|
||||
return dict(zip(col_names, row))
|
||||
|
||||
def dbsearch(query: str, limit=20, conn = None) -> list[dict]:
|
||||
conn = conn or make_connection()
|
||||
words = query.strip().lower().split()
|
||||
if not words:
|
||||
return []
|
||||
|
||||
clauses = " AND ".join(["searchstr LIKE ? "] * len(words))
|
||||
params = [f"%{w}%" for w in words]
|
||||
|
||||
sql = f"SELECT * FROM company WHERE {clauses} LIMIT {limit}"
|
||||
cursor = conn.cursor()
|
||||
cursor.execute(sql, params)
|
||||
rows = cursor.fetchall()
|
||||
|
||||
col_names = [desc[0] for desc in cursor.description]
|
||||
return [dict(zip(col_names, row)) for row in rows]
|
||||
|
||||
|
||||
def add_company(company_data: dict, conn = None):
|
||||
conn = conn or make_connection()
|
||||
cursor = conn.cursor()
|
||||
|
||||
# Define the SQL statement with placeholders (hardcoded columns)
|
||||
sql = """
|
||||
REPLACE INTO company (oid, title, address, town, searchstr, rating_2gis, trusted, nreviews, detections)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
"""
|
||||
|
||||
# Extract values from the dictionary and map them to the placeholders
|
||||
cursor.execute(sql, (
|
||||
company_data.get("oid"),
|
||||
company_data.get("title"),
|
||||
company_data.get("address"),
|
||||
company_data.get("town"),
|
||||
company_data.get("searchstr").lower(),
|
||||
company_data.get("rating_2gis"),
|
||||
company_data.get("trusted"),
|
||||
company_data.get("nreviews"),
|
||||
company_data.get("detections")
|
||||
))
|
||||
|
||||
# Commit the transaction
|
||||
conn.commit()
|
||||
|
|
@ -16,12 +16,14 @@ from .db import db
|
|||
from .const import WSCORE_THRESHOLD, WSCORE_HITS_THRESHOLD, MAX_USER_REVIEWS
|
||||
from .logger import logger
|
||||
from .company import Company, CompanyList
|
||||
from .companydb import add_company, get_by_oid, check_by_oid
|
||||
from .user import User, get_user
|
||||
from .relation import RelationDict, _is_dangerous
|
||||
from .settings import settings
|
||||
from .exceptions import AFReportNotReady, AFNoCompany, AFReportAlreadyExists
|
||||
# from .usernotes import Usernotes
|
||||
from .fd.master import MasterFD
|
||||
from .search import company_indexed, index_company
|
||||
|
||||
def detect(c: Company, cl: CompanyList, explain: bool = False, force=False):
|
||||
|
||||
|
|
@ -63,6 +65,12 @@ def detect(c: Company, cl: CompanyList, explain: bool = False, force=False):
|
|||
report = dict()
|
||||
report['score'] = score
|
||||
report['relations'] = list()
|
||||
|
||||
c.trusted = True
|
||||
c.detections = list()
|
||||
|
||||
add_company(c.export())
|
||||
|
||||
with gzip.open(c.report_path, "wt") as fh:
|
||||
json.dump(report, fh)
|
||||
|
||||
|
|
@ -111,6 +119,8 @@ def detect(c: Company, cl: CompanyList, explain: bool = False, force=False):
|
|||
dnames = [ dline.split(' ')[0] for dline in score['detections'] ]
|
||||
trust_line = f"RISK {len(dnames)} {'+'.join(dnames)}"
|
||||
|
||||
add_company(c.export())
|
||||
|
||||
logger.info(f"DETECTION RESULT {c} {trust_line}")
|
||||
return score
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
import subprocess
|
||||
|
||||
from .settings import settings
|
||||
from .company import Company
|
||||
import json
|
||||
import fcntl
|
||||
|
||||
def is_safe_search(query):
|
||||
# Проверяем, что строка состоит из букв, цифр и пробелов
|
||||
|
|
@ -28,3 +30,31 @@ def search(query: str, limit=50):
|
|||
except json.JSONDecodeError:
|
||||
print("Error decoding JSON:", r.stdout)
|
||||
return list()
|
||||
|
||||
def index_company(c: Company):
|
||||
|
||||
if company_indexed(c.object_id):
|
||||
print(f"{c.object_id} already indexed")
|
||||
return
|
||||
|
||||
# tmp
|
||||
return
|
||||
with open(settings.searchnew, "a", encoding="utf-8") as fh:
|
||||
fcntl.flock(fh, fcntl.LOCK_EX)
|
||||
fh.write(json.dumps(c.export()) + "\n")
|
||||
fcntl.flock(fh, fcntl.LOCK_UN)
|
||||
|
||||
def company_indexed(oid: str, path = None):
|
||||
if path is None:
|
||||
for index in [settings.search, settings.searchnew]:
|
||||
if company_indexed(oid, index):
|
||||
print(f"{oid} already indexed in {index}")
|
||||
return True
|
||||
return False
|
||||
|
||||
if not path.exists():
|
||||
return False
|
||||
|
||||
query = f'(.oid == "{oid}")'
|
||||
r = subprocess.run(['jq', '-c', f'. | select({query})', path], capture_output=True, text=True)
|
||||
print(f"indexed? {path} {r.returncode} {len(r.stdout)}")
|
||||
|
|
|
|||
|
|
@ -9,7 +9,9 @@ class Settings():
|
|||
self.user_storage = self.storage / "users"
|
||||
self.private_user_storage = self.storage / "users" / "_private.json"
|
||||
self.company_storage = self.storage / "companies"
|
||||
self.search = self.storage / "search.jsonl"
|
||||
# self.search = self.storage / "search.jsonl"
|
||||
# self.searchnew = self.storage / "searchnew.jsonl"
|
||||
self.companydb = self.storage / "companies.db"
|
||||
|
||||
# trust company if <= min_reviews
|
||||
self.min_reviews = int(os.getenv('MIN_REVIEWS', '20'))
|
||||
|
|
|
|||
|
|
@ -27,8 +27,17 @@ function turnstileCallback(){
|
|||
submit_btn.disabled = false;
|
||||
}
|
||||
|
||||
function make_toggle_link(){
|
||||
document.getElementById("toggle-link")?.addEventListener("click", function(e) {
|
||||
e.preventDefault();
|
||||
document.getElementById("recalc-box").classList.toggle("show");
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
function main(){
|
||||
make_auto_refresh()
|
||||
make_auto_refresh();
|
||||
make_toggle_link();
|
||||
}
|
||||
|
||||
main()
|
||||
|
|
@ -386,4 +386,17 @@ li {
|
|||
.cf-turnstile {
|
||||
width: 300px;
|
||||
height: 65px;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#recalc-box {
|
||||
display: none;
|
||||
max-height: 0;
|
||||
overflow: hidden;
|
||||
transition: max-height 0.5s ease;
|
||||
}
|
||||
|
||||
#recalc-box.show {
|
||||
display: flex;
|
||||
max-height: 200px; /* или auto для динамического размера */
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ from .logger import logger
|
|||
from .const import REDIS_WORKER_STATUS, REDIS_TRUSTED_LIST, REDIS_UNTRUSTED_LIST, REDIS_TASK_QUEUE_NAME, REDIS_DRAMATIQ_QUEUE
|
||||
from .user import reset_user_pool
|
||||
from .statistics import statistics
|
||||
from .search import index_company
|
||||
|
||||
broker = dramatiq.get_broker()
|
||||
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@
|
|||
<h1>{% block header %}{% endblock %}</h1>
|
||||
</div>
|
||||
<div class="search-container">
|
||||
<form class="search-form" id="search-form" method="POST" action="/search">
|
||||
<form class="search-form" id="search-form" method="GET" action="/search">
|
||||
<input type="text" id="oid" name="query" value="{{query}}" placeholder="Введите название компании или 2GIS object_id" required>
|
||||
<button type="submit">Найти</button>
|
||||
</form>
|
||||
|
|
|
|||
|
|
@ -114,10 +114,10 @@
|
|||
</div>
|
||||
|
||||
<div id="actionbox">
|
||||
<p>Можно пересчитать (на случай если в алгоритме поменяли параметры со времени расчета).</p>
|
||||
<p>Можно <a href="javascript:void(0)" class="toggle" id="toggle-link">пересчитать</a> (на случай если в алгоритме поменяли параметры со времени расчета).</p>
|
||||
|
||||
<form method="POST" action="/submit">
|
||||
<div class="flexdiv">
|
||||
<div class="flexdiv" id="recalc-box">
|
||||
{%if settings.turnstile_sitekey %}
|
||||
{% set btn_disabled = "disabled" %}
|
||||
|
||||
|
|
@ -129,11 +129,11 @@
|
|||
<div>
|
||||
<input type="hidden" name="force" value="true">
|
||||
<input type="hidden" name="oid" value="{{oid}}">
|
||||
<button id="submit_btn" {{btn_disabled}}>Пересчитать</button>
|
||||
<button type="submit" id="submit_btn" {{btn_disabled}}>Пересчитать</button>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div> <!-- actionbox -->
|
||||
|
||||
</div>
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue