LLM Code Documentation
This script automates the generation of AI-powered documentation for Docker services. Key features include:
- Uses SearxNG API to gather context about services from multiple sources
- Scrapes project websites and GitHub READMEs for additional context
- Processes content through Ollama LLM models (Qwen 2.5 and Llama 3.1)
- Generates structured markdown documentation with sections for:
- Introduction
- Uses and Benefits
- Docker Setup
- Security Essentials
- Handles frontmatter management including metadata, tags, and performance metrics
- Attempts but fails miserably to maintain consistent formatting and style across all generated content
import fs from 'fs';
import matter from 'gray-matter';
import axios from 'axios';
import * as cheerio from 'cheerio';
const SEARXNG_BASE_QUERY = `http://localhost:8080/search?&lang=en&safesearch=2&format=json&q=`;
const INPUT_DIRECTORY = '../src/content/services';
const OUTPUT_DIRECTORY = `../src/content/llm`;
const searchCategoryShebang = '!it'
const OLLAMA_MODELS = ['qwen2.5:latest', 'llama3.1:latest'];
const OPTIONS = {
temperature: 0.5, seed: 1, max_tokens: 3000, top_p: 0.9, frequency_penalty: 0.2, presence_penalty: 0.0
};
const PROMPT_SECTIONS = {
'Introduction': `Briefly introduce **[Service Name]**, highlighting features and benefits.`,
'Uses and Benefits': `Explain the primary use cases and benefits of **[Service Name]** in Docker.`,
'Docker Setup': `Provide a quick setup guide for **[Service Name]** in Docker, including tips for common issues.`,
'Security Essentials': `Outline essential security measures for **[Service Name]** in Docker.`,
};
const PRE_PROMPT = `
Create a short, concise, beginner-friendly article subsection for **[Service Name]** in Docker.
Guidelines: Provide your response in markdown format, ensuring code snippets are formatted correctly.
`;
const POST_PROMPT = `
Output Requirements: Keep language clear, concise, and use Markdown format throughout.
Respond solely with the information requested, do not include any urls, do not repeat the heading provided to you.
`;
async function generateAIText(model, dockerImage, prompt, llmContext, options) {
const ollamaUrl = 'http://localhost:11434/api/generate';
const enhancedPrompt = `${prompt.replace('[Service Name]', dockerImage)}\n\nAdditional Context:\n${llmContext}`;
try {
console.log(`Generating content for ${dockerImage} using model ${model}\nPrompt:\n${enhancedPrompt}`);
const response = await axios.post(ollamaUrl, {
model: model, prompt: enhancedPrompt, ...options, stream: false
});
const generatedContent = response.data.response;
console.log('Generated content:', generatedContent);
console.log('__________________________________________________________________________________________');
return generatedContent || null;
} catch (error) {
console.error('Error generating content:', error);
return null;
}
}
async function generateGuide(model, dockerImage, context, options) {
console.log(`Generating content for ${dockerImage} using model ${model}\ncontext:\n${context}`);
let guideContent = '';
for (const [section, prompt] of Object.entries(PROMPT_SECTIONS)) {
const customizedPrompt = `${PRE_PROMPT} Section: ${section}\n${prompt.replace('[Service Name]', dockerImage)}\n${POST_PROMPT}`;
const sectionContent = await generateAIText(model, dockerImage, customizedPrompt, context, options);
if (sectionContent) {
guideContent += `\n${sectionContent}`;
console.log(`Generated content for section: ${section}`);
console.log('__________________________________________________________________________________________');
} else {
console.error(`Failed to generate content for section: ${section}`);
}
console.log('__________________________________________________________________________________________');
}
return guideContent;
}
async function searchSearxNG(searchShebang, query) {
const queryString = encodeURI(`${SEARXNG_BASE_QUERY}${searchShebang} ${query}`);
try {
const response = await axios.post(queryString);
console.log('Search results:', response.data.results[0]);
return response.data.results.filter(result => result.score > 0.5);
} catch (error) {
console.error('Error fetching data:', error);
return [];
}
}
// Function to read frontmatter and extract the dockerImage value
function getFrontMatterValue(inputFilePath, frontMatterKey) {
const fileContent = fs.readFileSync(inputFilePath, 'utf-8');
const frontmatter = matter(fileContent);
return frontmatter.data[`${frontMatterKey}`] || null;
}
// Function to scrape content from a URL
async function scrapeMainWebsiteHomepage(url) {
try {
const response = await axios.get(url);
const $ = cheerio.load(response.data);
$('script').remove();
$('style').remove();
const cleanedPageContent = $('body').text().toString().trim().replace(/\s+|\n/g, ' ');
console.log('cleanedPageContent:', cleanedPageContent);
return cleanedPageContent;
} catch (error) {
console.error('Error scraping content:', error);
return '';
}
}
// Function to scrape GitHub README and Docker Hub details for further context
async function scrapeGitHubReadme(githubUrl) {
try {
return await axios.get(githubUrl).then(response => {
const $ = cheerio.load(response.data);
const readmeContent = [];
// Iterate over each element in the markdown body
$('.markdown-body').contents().each((index, element) => {
const $element = $(element);
// Ignore any elements with images sourced from avatars.githubusercontent.com
if ($element.find('img').attr('src')?.includes('avatars.githubusercontent.com')) {
return; // Skip to the next element
}
// Append text content if not breaking on cutoff word
readmeContent.push($element.text().trim());
});
// Join accumulated content into a single string, removing excess whitespace
return readmeContent.join(' ').replace(/\s+/g, ' ').trim();
});
} catch (error) {
console.error('Error scraping GitHub README:', error);
return null
}
}
function generateTagsFromSearXNGResults(searchResults) {
console.log('generateTagsFromSearXNGResults.searchResults:', searchResults);
return searchResults.reduce((acc, result) => {
if (result.url.includes('hub.docker') || result.url.includes('github.com')) {
acc.push(...result.tags);
}
return acc;
}, []);
}
function generateFrontMatterDescription(searchResults) {
console.log('generateFrontMatterDescription.searchResults:', searchResults);
return searchResults.map(result => {
if (result.url.includes('hub.docker.com') || result.url.includes('github.com')) {
return result.content;
}
}).filter(Boolean);
}
const sanitiseModelNameForUseInFileName = (modelName) => modelName.replace(':', '_').replace(' ', '_').replace('.', '-').toLowerCase();
async function fetchServiceContext(projectUrl, title) {
try {
const homepageContent = await scrapeMainWebsiteHomepage(projectUrl)
const searchResults = await searchSearxNG(searchCategoryShebang, title)
if (!searchResults.length) {
console.error('No search results found for:', title);
return {tags: [], description: '', context: 'No searchResults available.'};
}
const tags = generateTagsFromSearXNGResults(searchResults);
if (!tags.length) {
console.error('No tags found for:', title);
return {tags: [], description: '', context: 'No Tags available.'};
}
const description = generateFrontMatterDescription(searchResults);
const githubUrl = searchResults.find(result => result.url.includes('github.com'))?.url;
const githubReadme = githubUrl ? await scrapeGitHubReadme(githubUrl) : null;
let context = '';
if (homepageContent && githubReadme) {
context = `App website:\n${homepageContent}\nApp GitHub README: ${githubReadme}`;
} else if (homepageContent) {
context = `App website:\n${homepageContent}`;
} else if (githubReadme) {
context = `App GitHub README: ${githubReadme}`;
} else {
context = 'No additional context available.';
}
return {tags, description, context};
} catch (error) {
console.error('Error fetching service context:', error);
return {tags: [], description: '', context: 'No context available.'};
}
}
// Main function to generate markdown content for each service
const main = async () => {
const files = fs.readdirSync(INPUT_DIRECTORY);
for (const model of OLLAMA_MODELS) {
const sanitizedModelName = sanitiseModelNameForUseInFileName(model);
const modelOutputDir = `${OUTPUT_DIRECTORY}/${sanitizedModelName}`;
if (!fs.existsSync(modelOutputDir)) fs.mkdirSync(modelOutputDir, {recursive: true});
for (const file of files) {
const inputFilePath = `${INPUT_DIRECTORY}/${file}`;
const outputFilePath = `${modelOutputDir}/${file}`;
const fileContent = fs.readFileSync(inputFilePath, 'utf-8');
const frontmatter = matter(fileContent);
const dockerImage = getFrontMatterValue(inputFilePath, 'dockerImage');
const iconName = getFrontMatterValue(inputFilePath, 'iconName');
if (iconName === 'default') {
console.error(`Skipping ${dockerImage} as it has default icon.`);
continue;
}
if (!dockerImage) {
console.error(`No dockerImage found for ${file}`);
continue;
}
const {
tags, description, context
} = await fetchServiceContext(frontmatter.data.projectUrl, frontmatter.data.title);
const startTime = Date.now();
const articleContent = await generateGuide(model, dockerImage, context, OPTIONS);
const generationTime = (Date.now() - startTime) / 1000;
if (!articleContent) {
console.error(`Failed to generate content for ${dockerImage} with ${model}`);
continue;
}
const newFrontmatter = {
...frontmatter.data,
description: description,
tags: tags,
model: model,
isAI: true,
generationTime: generationTime
};
const finalContent = matter.stringify('', newFrontmatter) + articleContent;
fs.writeFileSync(outputFilePath, finalContent, 'utf-8');
console.log(`Generated and saved content for ${dockerImage} with ${model} in ${generationTime}s.`);
}
}
};
main().then(r => console.log('Done!')).catch(console.error);