get links from source page

2025-04-27 20:03:40 +01:00 · 2022-06-09 10:06:22 +01:00 · 2022-06-09 10:06:22 +01:00 · 419031d65d
commit 419031d65d
parent 711b3412bc
7 changed files with 102 additions and 50 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,3 @@
 .env
 node_modules
 releasebb.json
 config.json
--- a/FeedFilter.js
+++ b/FeedFilter.js
@ -0,0 +1,53 @@
 const fs = require('fs')
 const { linkAdder } = require('./JDLinkAdder');
 const { getLinksFromURL } = require('./LinkGrabber')
 const log = require('simple-node-logger').createSimpleLogger('jdrssdownloader.log');
 async function filterFeed(fileName) {
    let myshowlist = JSON.parse(fs.readFileSync('config.json')).Shows
    let feed = JSON.parse(fs.readFileSync(fileName));
    myshowlist.forEach(async show => {
        try {
            // Find show on feed
            let list_filtered_for_show = feed.filter(item => item.title.includes(show.Name))
            if (list_filtered_for_show.length > 0) {
                // If show is found get url then return all links on that page
                let full_link_list_from_page = await getLinksFromURL(list_filtered_for_show[0].link)
                // Only get urls with HEVC in name
                let urls_with_HEVC_in_url = full_link_list_from_page.filter(item => item.includes('HEVC'))
                if (urls_with_HEVC_in_url.length == 0) {
                    // If no urls with HEVC check for H265
                    urls_with_HEVC_in_url = full_link_list_from_page.filter(item => item.includes('H265'))
                }
                // Only keep urls that match show quality
                let urls_with_quality_in_url = urls_with_HEVC_in_url.filter(item => item.includes(show.Quality))
                // Remove any url trying to direct to a torrent site search
                let urls_without_torrent_in_url = urls_with_quality_in_url.filter(item => !item.includes('torrent'))
                // Remove any url that doesn't include MeGusta
                let only_MeGusta_links = urls_without_torrent_in_url.filter(item => item.includes('MeGusta'))
                // NitroFlare doesn't group with the rest of the links in JD, remove them.
                let remove_nitroflare = only_MeGusta_links.filter(item => !item.includes('nitro'))
                // Send Links to JDdownloader
                if (remove_nitroflare.length !== 0) {
                    log.info(remove_nitroflare.length + ' links for ' + show.Name + ' have been sent to JDdownloader')
                    linkAdder(remove_nitroflare)
                } else {
                    // No HEVC links found
                    log.info(remove_nitroflare.length + ' HEVC links for ' + show.Name + ' have been found')
                }
            } else {
                // Show not found on the current feed cache
                log.info(show.Name + ' not on feed')
            }
        } catch (error) {
            log.error('Something went wrong ' + error)
        }
    })
 }
 module.exports = {
    filterFeed
 }
--- a/jd-link-adder.js
+++ b/jd-link-adder.js
--- a/LinkGrabber.js
+++ b/LinkGrabber.js
@ -0,0 +1,26 @@
 const axios = require('axios');
 var cheerio = require('cheerio');
 async function getLinksFromURL(url) {
    try {
        let links = [];
        let httpResponse = await axios.get(url);
        let $ = cheerio.load(httpResponse.data);
        let linkObjects = $('a'); // get all hyperlinks
        linkObjects.each((index, element) => {
            links.push(
                $(element).attr('href'), // get the href attribute
            );
        });
        return links;
    } catch (e) { console.log(e) }
 }
 module.exports = {
    getLinksFromURL
 }
--- a/feed.js
+++ b/feed.js
@ -1,29 +0,0 @@
 const extractUrls = require("extract-urls");
 const fs = require('fs')
 const { linkAdder } = require('./jd-link-adder');
 const log = require('simple-node-logger').createSimpleLogger('jdrssdownloader.log');
 function filterFeed(fileName) {
    let myshowlist = JSON.parse(fs.readFileSync('config.json')).Shows
    let feed = JSON.parse(fs.readFileSync(fileName));
    myshowlist.forEach(show => {
        try {
            let list_filtered_for_show = feed.filter(item => item.title.includes(show.Name))
            let extracted_urls_for_show = extractUrls(list_filtered_for_show[0]["content:encoded"]);
            let urls_with_HEVC_in_url = extracted_urls_for_show.filter(item => item.includes('HEVC') || item.includes('H265'))
            let urls_with_quality_in_url = urls_with_HEVC_in_url.filter(item => item.includes(show.Quality))
            let urls_without_torrent_in_url = urls_with_quality_in_url.filter(item => !item.includes('torrent'))
            log.info(show.Name + ' - ' + urls_without_torrent_in_url)
            linkAdder(urls_without_torrent_in_url)
            // console.log(urls_without_torrent_in_url)
        } catch (error) {
            log.info(show.Name + ' not on feed')
        }
    })
 }
 module.exports = {
    filterFeed
 }
--- a/main.js
+++ b/main.js
@ -1,33 +1,34 @@
 // Import dependencies
 const fs = require("fs");
 const Parser = require("rss-parser");
-const { filterFeed } = require("./feed");
+const { filterFeed } = require("./FeedFilter");
 const lodash = require('lodash');
 const log = require('simple-node-logger').createSimpleLogger('jdrssdownloader.log');
-    (async function main() {
+(async function main() {
-        // Make a new RSS Parser
+    // Make a new RSS Parser
-        const parser = new Parser();
+    const parser = new Parser();
-        // Get all the items in the RSS feed
+    // Get all the items in the RSS feed
-        const feed = await parser.parseURL(JSON.parse(fs.readFileSync('config.json')).RSSFeed);
+    const feed = await parser.parseURL(JSON.parse(fs.readFileSync('config.json')).RSSFeed);
-        let items = [];
+    let items = [];
-        // Clean up the string and replace reserved characters
+    // Clean up the string and replace reserved characters
-        const fileName = `${feed.title.replace(/\s+/g, "-").replace(/[/\\?%*:|"<>]/g, '').toLowerCase()}.json`;
+    const fileName = `${feed.title.replace(/\s+/g, "-").replace(/[/\\?%*:|"<>]/g, '').toLowerCase()}.json`;
-        if (fs.existsSync(fileName)) {
+    if (fs.existsSync(fileName)) {
-            items = require(`./${fileName}`);
+        items = require(`./${fileName}`);
-        }
+    }
-        let updatedArray = lodash.unionBy(feed.items, items, 'title');
+    // Compare existing cache and new items and merge differences
    let updatedArray = lodash.unionBy(feed.items, items, 'title');
-        // Save the file
+    // Save the file
-        log.info(updatedArray.length + ' items in file cache')
+    log.info(updatedArray.length + ' items in file cache')
-        fs.writeFileSync(fileName, JSON.stringify(updatedArray));
+    fs.writeFileSync(fileName, JSON.stringify(updatedArray));
-        // run next part
+    // run next part
-        filterFeed(fileName)
+    filterFeed(fileName)
-    })();
+})();
--- a/package.json
+++ b/package.json
@ -9,6 +9,8 @@
  "author": "Karl0ss",
  "license": "ISC",
  "dependencies": {
    "axios": "^0.27.2",
    "cheerio": "^1.0.0-rc.11",
    "extract-urls": "^1.3.2",
    "jdownloader-client": "^1.0.0",
    "lodash": "^4.17.21",