Skip to content

Commit

Permalink
feat(app):
Browse files Browse the repository at this point in the history
- update extension key
- fix sso bug
- add async cos
- change prompt multilanguage
  • Loading branch information
MorvanZhou committed Aug 21, 2024
1 parent 2186a3f commit add140a
Show file tree
Hide file tree
Showing 21 changed files with 1,953 additions and 71 deletions.
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ retk =
models/search_engine/*.txt
plugins/official_plugins/**/*
core/ai/llm/knowledge/*.md
markdown.css

[options.extras_require]
build =
Expand Down
2 changes: 1 addition & 1 deletion src/retk/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ async def lifespan(app: FastAPI):
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
expose_headers=["X-Captcha-Token"],
expose_headers=["X-Captcha-Token", "Content-Disposition"],
)
app.add_middleware(safety.CSPMiddleware)
app.add_middleware(safety.FrameOptionsMiddleware)
Expand Down
1 change: 1 addition & 0 deletions src/retk/const/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
RETHINK_DIR = Path(__file__).parent.parent
DOT_DATA = ".data"
FRONTEND_DIR = RETHINK_DIR / "dist-local"
LOCAL_FILE_URL_PRE_DIR = "files"
MD_MAX_LENGTH = 100_000
REQUEST_ID_MAX_LENGTH = 50
UID_MAX_LENGTH = 30
Expand Down
58 changes: 56 additions & 2 deletions src/retk/controllers/node/node_ops.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import List
from typing import List, Literal

from fastapi.responses import StreamingResponse

from retk import const, core
from retk.controllers import schemas
from retk.controllers.utils import maybe_raise_json_exception
from retk.controllers.utils import maybe_raise_json_exception, json_exception
from retk.models.tps import AuthedUser, Node
from retk.utils import contain_only_http_link, get_title_description_from_link, datetime2str

Expand Down Expand Up @@ -223,3 +225,55 @@ async def get_favorite_nodes(
requestId=au.request_id,
data=_get_node_search_response_data(nodes=nodes, total=total),
)


async def stream_md_export(
au: AuthedUser,
nid: str,
format_: Literal["md", "html", "pdf"],
) -> StreamingResponse:
media_type, title, file, code = await core.node.md_export(
au=au,
nid=nid,
format_=format_,
)
maybe_raise_json_exception(au=au, code=code)
# stream send file chunk
if media_type == "application/zip":
headers = {
"Content-Disposition": f"attachment; filename={title}.zip",
}
elif media_type == "text/markdown":
headers = {
"Content-Disposition": f"attachment; filename={title}.md",
}
elif media_type == "text/html":
headers = {
"Content-Disposition": f"attachment; filename={title}.html",
}
elif media_type == "application/pdf":
headers = {
"Content-Disposition": f"attachment; filename={title}.pdf",
}
else:
raise json_exception(
request_id=au.request_id,
uid=au.u.id,
code=const.CodeEnum.OPERATION_FAILED,
language=au.language,
)
headers["Request-Id"] = au.request_id

# iter file by chunk size
async def iter_file():
while True:
chunk = file.read(1024)
if not chunk:
break
yield chunk

return StreamingResponse(
content=iter_file(),
media_type=media_type,
headers=headers,
)
37 changes: 36 additions & 1 deletion src/retk/core/ai/llm/knowledge/system_extend.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,11 @@

接下来,我将展示我最近接触到的一条信息,
请依据你的内在丰富的知识网络,帮我推荐出一条我会感兴趣的 **新知识**
请注意,你返回的结果必须为下面案例展示的 JSON 格式。
请注意:

1. 你返回的结果必须为下面案例展示的 JSON 格式。
2. 在每次生成 JSON 结果的时候,生成的 value 请遵循我的主语言,比如我展示的信息使用 English,那么就生成 English
内容,若主要使用中文,那么就生成中文内容。

# 案例 1:

Expand Down Expand Up @@ -68,3 +72,34 @@
```

"""

# 案例 3:

## 我展示的信息

"""
Title: Dual Process Theory in "Thinking, Fast and Slow"

Key Points:

1. The book is a popular science work by psychologist Daniel Kahneman.
2. Published in 2011.
3. Main thesis differentiates between two modes of thought.
4. "System 1" is characterized as fast, instinctive, and emotional.
5. "System 2" is described as slower, more deliberative, and more logical.

"""

## 你返回的结果

"""

```json
{
"title": "The Influence of Cognitive Biases on Decision Making",
"content": "- Cognitive biases are systematic errors in thinking that affect the decisions and judgments that people make.\n- Some of these biases are related to memory. The way you remember an event may be biased for a number of reasons and that in turn can lead to biased thinking and decision-making.\n- Other cognitive biases might be related to problems with attention. Since attention is a limited resource, people have to be selective about what they pay attention to in the world around them.\n- Because of these biases, people often create their own 'subjective social reality' that may not align with the objective world.\n- Understanding these biases can help improve decision making skills and lead to better outcomes in life.",
"searchTerms": "Cognitive biases, Decision making, Subjective social reality"
}
```

"""
32 changes: 31 additions & 1 deletion src/retk/core/ai/llm/knowledge/system_summary.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
你是一个博学多才的人,拥有非常丰富的知识,十分善于用简练的语言总结复杂的概念。

接下来,我将展示我最近接触到的一些知识或信息,请帮我提炼总结这段认知的关键信息,总结一个简短标题,并简短罗列出知识点来。
接下来,我将展示我最近接触到的一些知识或信息,请帮我:

1. 提炼总结这段认知的关键信息,总结一个简短标题,并简短罗列出知识点来。
2. 请遵循我的主语言,比如我展示的信息是用 English,那么就生成 English 总结,若主语言为中文,那么就生成中文总结。

案例 1:

Expand Down Expand Up @@ -51,3 +54,30 @@
5. 生命过程中水的作用:输送养分和排除废物

"""

案例 3:

# 我展示的信息:

"""
Thinking, Fast and Slow

Thinking, Fast and Slow is a 2011 popular science book by psychologist Daniel Kahneman. The book's main thesis is a
differentiation between two modes of thought: "System 1" is fast, instinctive and emotional; "System 2" is slower, more
deliberative, and more logical
"""

# 你要返回的总结格式:

"""
Title: Dual Process Theory in "Thinking, Fast and Slow"

Key Points:

1. The book is a popular science work by psychologist Daniel Kahneman.
2. Published in 2011.
3. Main thesis differentiates between two modes of thought.
4. "System 1" is characterized as fast, instinctive, and emotional.
5. "System 2" is described as slower, more deliberative, and more logical.

"""
62 changes: 13 additions & 49 deletions src/retk/core/files/saver.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,11 @@
from PIL import Image, UnidentifiedImageError
from bson import ObjectId

try:
from qcloud_cos import CosConfig, CosServiceError, CosS3Client
except ImportError:
pass

from retk.config import get_settings, is_local_db
from retk.const.app import FileTypesEnum
from retk.const.settings import IMG_RESIZE_THRESHOLD, DOT_DATA
from retk.const.settings import IMG_RESIZE_THRESHOLD, DOT_DATA, LOCAL_FILE_URL_PRE_DIR
from retk.core.user import update_used_space
from retk.core.utils.cos import cos_client
from retk.logger import logger
from retk.models.client import client
from retk.models.tps import UserFile
Expand Down Expand Up @@ -96,7 +92,7 @@ async def save_local(self, uid: str, file: File) -> str:

if path.exists():
# skip the same image
return f"/files/{file.hashed_filename}"
return f"/{LOCAL_FILE_URL_PRE_DIR}/{file.hashed_filename}"

try:
if file.type == FileTypesEnum.IMAGE:
Expand All @@ -116,62 +112,30 @@ async def save_local(self, uid: str, file: File) -> str:
return ""

await add_to_db(uid=uid, file=file)
return f"/files/{file.hashed_filename}"
return f"/{LOCAL_FILE_URL_PRE_DIR}/{file.hashed_filename}"

async def save_remote(self, uid: str, file: File):
# to cos
token = None

settings = get_settings()
secret_id = settings.COS_SECRET_ID
secret_key = settings.COS_SECRET_KEY
region = settings.COS_REGION
domain = settings.COS_DOMAIN
cos_conf = CosConfig(
Region=region,
SecretId=secret_id,
SecretKey=secret_key,
Token=token,
Domain=domain,
Scheme='https',
)
cos_client = CosS3Client(cos_conf)

key = f"userData/{uid}/{file.hashed_filename}"

domain = settings.COS_DOMAIN or f"{settings.COS_BUCKET_NAME}.cos.{region}.myqcloud.com"
url = f"https://{domain}/{key}"
key = cos_client.get_user_data_key(uid=uid, filename=file.hashed_filename)
url = f"https://{cos_client.domain}/{key}"

doc = await client.coll.user_file.find_one({"uid": uid, "fid": file.hashed_filename})
if doc:
return url

try:
_ = cos_client.head_object(
Bucket=settings.COS_BUCKET_NAME,
Key=key
)
if await cos_client.async_has_file(uid=uid, filename=file.hashed_filename):
return url
except CosServiceError as e:
if e.get_status_code() != 404:
return url

if file.type == FileTypesEnum.IMAGE:
file.image_resize(resize_threshold=self.resize_threshold)

# can raise error
try:
_ = cos_client.put_object(
Bucket=settings.COS_BUCKET_NAME,
Body=file.data,
Key=key,
StorageClass='STANDARD', # 'STANDARD'|'STANDARD_IA'|'ARCHIVE',
EnableMD5=False,
# ContentType=content_type,
)
except CosServiceError as e:
logger.error(f"failed to save file to cos: {e}")
if not await cos_client.async_put(
file=file.data,
uid=uid,
filename=file.hashed_filename,
):
return ""

await add_to_db(uid=uid, file=file)
return url

Expand Down
24 changes: 23 additions & 1 deletion src/retk/core/node/node.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import copy
import datetime
from typing import List, Optional, Tuple, Dict, Any
import urllib.parse
from io import BytesIO
from typing import List, Optional, Tuple, Dict, Any, Literal

from bson import ObjectId
from bson.tz_util import utc

from retk import config, const, utils, regex
from retk import plugins
from retk.core import user, ai
from retk.core.utils import md_tools
from retk.logger import logger
from retk.models import tps, db_ops
from retk.models.client import client
Expand Down Expand Up @@ -446,3 +449,22 @@ async def get_hist_edition_md(au: tps.AuthedUser, nid: str, version: str) -> Tup
if version not in n["history"]:
return "", const.CodeEnum.NODE_NOT_EXIST
return backup.get_md(uid=au.u.id, nid=nid, version=version)


async def md_export(
au: tps.AuthedUser,
nid: str,
format_: Literal["md", "html", "pdf"],
) -> Tuple[str, str, Optional[BytesIO], const.CodeEnum]:
n, code = await get(au=au, nid=nid)
if code != const.CodeEnum.OK:
return "", "", None, code

media_type, file = await md_tools.md_export(
uid=au.u.id,
title=n["title"],
md=n["md"],
format_=format_,
)
title = urllib.parse.quote(n["title"])
return media_type, title, file, const.CodeEnum.OK
2 changes: 1 addition & 1 deletion src/retk/core/notice.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ async def post_in_manager_delivery(
senderType=au.u.type,
senderId=au.u.id,
title=title,
html=md2html(content),
html=md2html(content, with_css=True),
snippet=md2txt(content)[:20],
recipientType=recipient_type, # send to which user type, 0: all, 1: batch, 2: admin, 3: manager
batchTypeIds=batch_type_ids, # if recipient=batch, put user id here
Expand Down
2 changes: 1 addition & 1 deletion src/retk/core/self_hosted.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ async def notice_new_pkg_version():
__new_version_content_temp_zh if language == "zh" else __new_version_content_temp_en
).format(local_version_str, remote_version_str)
for notice in res:
if notice["title"] == title and notice["html"] == md2html(content):
if notice["title"] == title and notice["html"] == md2html(content, with_css=True):
return
await post_in_manager_delivery(
au=_local_system_authed_user,
Expand Down
Loading

0 comments on commit add140a

Please sign in to comment.