User:Polygnotus/Scripts/GetContext.js

This is an old revision of this page, as edited by Polygnotus (talk | contribs) at 23:07, 8 September 2024 (get context). The present address (URL) is a permanent link to this revision, which may differ significantly from the current revision.
(diff) ← Previous revision | Latest revision (diff) | Newer revision → (diff)
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
function getWikipediaContext(articleName, word, n) {
    return new Promise((resolve, reject) => {
        const url = `https://en.wikipedia.org/w/api.php?action=parse&page=${encodeURIComponent(articleName)}&format=json&prop=text&origin=*`;

        fetch(url)
            .then(response => response.json())
            .then(data => {
                const htmlContent = data.parse.text['*'];
                const textContent = htmlContent.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();

                const wordRegex = new RegExp(`\\b${word}\\b`, 'gi');
                let count = 0;
                let index = -1;

                while (count < n && (match = wordRegex.exec(textContent)) !== null) {
                    count++;
                    if (count === n) {
                        index = match.index;
                        break;
                    }
                }

                if (index === -1) {
                    reject(`Could not find the ${n}th occurrence of "${word}"`);
                    return;
                }

                const words = textContent.split(/\s+/);
                const wordIndex = words.findIndex((_, i) => words.slice(0, i + 1).join(' ').length >= index);

                const start = Math.max(0, wordIndex - 50);
                const end = Math.min(words.length, wordIndex + 51);

                const result = words.slice(start, end).join(' ');
                resolve(result);
            })
            .catch(error => reject(`Error fetching Wikipedia content: ${error}`));
    });
}

getWikipediaContext('Horseshoe crab', 'trilobite', 3)
    .then(result => {
        console.log("Result:");
        console.log(result);
    })
    .catch(error => {
        console.error("Error:");
        console.error(error);
    });