-
Notifications
You must be signed in to change notification settings - Fork 42
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
POC: Deep Search PDF to MD file conversion
Signed-off-by: Brent Salisbury <[email protected]>
- Loading branch information
Showing
15 changed files
with
638 additions
and
99 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,6 @@ npm-debug.log | |
.env | ||
*.env | ||
coverage | ||
lib | ||
taxonomy | ||
config.yaml | ||
generated | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
'use server'; | ||
|
||
import { NextResponse, NextRequest } from 'next/server'; | ||
import fetch from 'node-fetch'; | ||
|
||
export async function POST(req: NextRequest) { | ||
const { repoUrl, documentNames } = await req.json(); | ||
const USERNAME = process.env.DS_USERNAME; | ||
const API_KEY = process.env.DS_API_KEY; | ||
const HOST = process.env.DS_HOST; | ||
const PROJ_KEY = process.env.DS_PROJ_KEY; | ||
const BRANCH = 'main'; | ||
|
||
if (!USERNAME || !API_KEY || !HOST || !PROJ_KEY) { | ||
console.error('Missing environment variables'); | ||
return NextResponse.json({ error: 'Missing environment variables' }, { status: 500 }); | ||
} | ||
|
||
const pdfFileName = documentNames.find((name) => name.endsWith('.pdf')); | ||
if (!pdfFileName) { | ||
console.error('No PDF file found for conversion'); | ||
return NextResponse.json({ error: 'No PDF file found for conversion' }, { status: 400 }); | ||
} | ||
|
||
const [repoOwner, repoName] = repoUrl.replace('https://github.com/', '').split('/'); | ||
const PDF_URL = `https://raw.githubusercontent.com/${repoOwner}/${repoName}/${BRANCH}/${pdfFileName}`; | ||
console.log(`PDF URL for conversion: ${PDF_URL}`); | ||
|
||
try { | ||
console.log('Starting authentication...'); | ||
const authResponse = await fetch(`${HOST}/api/cps/user/v1/user/token`, { | ||
method: 'POST', | ||
headers: { | ||
'Content-Type': 'application/json', | ||
Authorization: `Basic ${Buffer.from(`${USERNAME}:${API_KEY}`).toString('base64')}` | ||
}, | ||
body: JSON.stringify({}) | ||
}); | ||
|
||
if (!authResponse.ok) { | ||
const error = await authResponse.text(); | ||
console.error('Error during authentication:', error); | ||
return NextResponse.json({ error }, { status: authResponse.status }); | ||
} | ||
|
||
const authData = await authResponse.json(); | ||
const token = authData.access_token; | ||
console.log('Authentication successful. Token obtained.'); | ||
|
||
console.log('Starting PDF conversion...'); | ||
const convertResponse = await fetch(`${HOST}/api/cps/public/v2/project/${PROJ_KEY}/convert`, { | ||
method: 'POST', | ||
headers: { | ||
'Content-Type': 'application/json', | ||
Authorization: token | ||
}, | ||
body: JSON.stringify({ | ||
http_source: { url: PDF_URL, headers: {} } | ||
}) | ||
}); | ||
|
||
if (!convertResponse.ok) { | ||
const error = await convertResponse.text(); | ||
console.error('Error during PDF conversion:', error); | ||
return NextResponse.json({ error }, { status: convertResponse.status }); | ||
} | ||
|
||
const convertData = await convertResponse.json(); | ||
const taskId = convertData.task_id; | ||
console.log(`PDF conversion started. Task ID: ${taskId}`); | ||
|
||
console.log('Checking conversion task status...'); | ||
let taskStatus; | ||
while (true) { | ||
const taskResponse = await fetch(`${HOST}/api/cps/public/v2/project/${PROJ_KEY}/convert_tasks/${taskId}?wait=10`, { | ||
method: 'GET', | ||
headers: { | ||
Authorization: token | ||
} | ||
}); | ||
|
||
if (!taskResponse.ok) { | ||
const error = await taskResponse.text(); | ||
console.error('Error during task status check:', error); | ||
return NextResponse.json({ error }, { status: taskResponse.status }); | ||
} | ||
|
||
const taskText = await taskResponse.text(); | ||
try { | ||
taskStatus = JSON.parse(taskText); | ||
} catch (parseError) { | ||
console.error('Error parsing task status response:', taskText); | ||
return NextResponse.json({ error: 'Failed to parse task status response' }, { status: 500 }); | ||
} | ||
|
||
console.log(`Task status: ${taskStatus.task_status}`); | ||
|
||
if (taskStatus.result && ['SUCCESS', 'FAILURE'].includes(taskStatus.task_status)) { | ||
break; | ||
} | ||
await new Promise((resolve) => setTimeout(resolve, 10000)); // Wait for 10 seconds before polling again | ||
} | ||
|
||
if (taskStatus.task_status === 'FAILURE') { | ||
console.error('PDF Conversion Task failed.'); | ||
return NextResponse.json({ error: 'PDF Conversion Task failed' }, { status: 500 }); | ||
} | ||
|
||
const result = { | ||
json_file_url: taskStatus.result.json_file_url, | ||
md_file_url: taskStatus.result.md_file_url, | ||
document_hash: taskStatus.result.document_hash | ||
}; | ||
|
||
console.log('Task completed successfully.'); | ||
console.log(`JSON file URL: ${result.json_file_url}`); | ||
console.log(`Markdown file URL: ${result.md_file_url}`); | ||
console.log(`Document hash: ${result.document_hash}`); | ||
|
||
return NextResponse.json(result); | ||
} catch (error) { | ||
console.error('Unexpected error:', error); | ||
return NextResponse.json({ error: error.message }, { status: 500 }); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
100 changes: 100 additions & 0 deletions
100
src/components/Contribute/Knowledge/FileSelectionModal.tsx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
// src/components/Contribute/Knowledge/FileSelectionModal.tsx | ||
import React, { useEffect, useState } from 'react'; | ||
import { Modal, Button, DataList, DataListItem, DataListItemRow, DataListCell, DataListCheck, Spinner, Alert } from '@patternfly/react-core'; | ||
import { fetchGitHubRepoFiles } from '@/utils/fileManagerGithub'; | ||
import { useSession } from 'next-auth/react'; | ||
import { getGitHubUsername } from '@/utils/github'; | ||
|
||
interface FileSelectionModalProps { | ||
isOpen: boolean; | ||
onClose: () => void; | ||
onSelectFiles: (files: string[]) => void; | ||
repoName: string; | ||
} | ||
|
||
export const FileSelectionModal: React.FC<FileSelectionModalProps> = ({ isOpen, onClose, onSelectFiles, repoName }) => { | ||
const { data: session } = useSession(); // Get the session data from NextAuth | ||
const [files, setFiles] = useState<any[]>([]); // State for storing the list of files from the repository | ||
const [loading, setLoading] = useState<boolean>(true); // State for managing the loading state | ||
const [error, setError] = useState<string | null>(null); | ||
const [selectedFiles, setSelectedFiles] = useState<string[]>([]); // State for storing selected files | ||
const [githubUsername, setGithubUsername] = useState<string | null>(null); | ||
|
||
useEffect(() => { | ||
const loadFiles = async () => { | ||
if (!session || !session.accessToken) { | ||
setError('Unauthorized: Missing or invalid access token'); | ||
setLoading(false); | ||
return; | ||
} | ||
|
||
try { | ||
const username = await getGitHubUsername(session.accessToken as string); | ||
setGithubUsername(username); | ||
const repoFiles = await fetchGitHubRepoFiles(session.accessToken as string); | ||
setFiles(repoFiles); | ||
} catch (err) { | ||
setError('Failed to load files'); | ||
console.error(err); | ||
} finally { | ||
setLoading(false); | ||
} | ||
}; | ||
|
||
loadFiles(); | ||
}, [session]); | ||
|
||
const handleSelectFile = (filePath: string, isSelected: boolean) => { | ||
setSelectedFiles((prevSelectedFiles) => (isSelected ? [...prevSelectedFiles, filePath] : prevSelectedFiles.filter((file) => file !== filePath))); | ||
}; | ||
|
||
// Function to confirm the selection of files and close the modal | ||
const handleConfirmSelection = () => { | ||
// Pass selected files to the parent component | ||
onSelectFiles(selectedFiles); | ||
onClose(); | ||
}; | ||
|
||
return ( | ||
<Modal | ||
title="Select PDF or Markdown files from your knowledge files repository on GitHub" | ||
isOpen={isOpen} | ||
onClose={onClose} | ||
actions={[ | ||
<Button key="confirm" variant="primary" onClick={handleConfirmSelection}> | ||
Confirm | ||
</Button>, | ||
<Button key="cancel" variant="link" onClick={onClose}> | ||
Cancel | ||
</Button> | ||
]} | ||
> | ||
{loading && <Spinner size="lg" />} | ||
{error && ( | ||
<Alert variant="danger" title="Error loading files"> | ||
{error} | ||
</Alert> | ||
)} | ||
{!loading && !error && ( | ||
<DataList aria-label="File List"> | ||
{files.map((file, index) => ( | ||
<DataListItem key={index} aria-labelledby={`file-item-${index}`}> | ||
<DataListItemRow> | ||
<DataListCheck | ||
aria-labelledby={`file-item-${index}`} | ||
checked={selectedFiles.includes(file.path)} | ||
onChange={(checked) => handleSelectFile(file.path, checked)} | ||
/> | ||
<DataListCell> | ||
<span id={`file-item-${index}`}>{file.path}</span> | ||
</DataListCell> | ||
</DataListItemRow> | ||
</DataListItem> | ||
))} | ||
</DataList> | ||
)} | ||
</Modal> | ||
); | ||
}; | ||
|
||
export default FileSelectionModal; |
Oops, something went wrong.