get links from source page

This commit is contained in:
karl.hudgell 2022-06-09 10:06:22 +01:00
parent 711b3412bc
commit 419031d65d
7 changed files with 102 additions and 50 deletions

1
.gitignore vendored
View File

@ -1,4 +1,3 @@
.env
node_modules node_modules
releasebb.json releasebb.json
config.json config.json

53
FeedFilter.js Normal file
View File

@ -0,0 +1,53 @@
const fs = require('fs')
const { linkAdder } = require('./JDLinkAdder');
const { getLinksFromURL } = require('./LinkGrabber')
const log = require('simple-node-logger').createSimpleLogger('jdrssdownloader.log');
async function filterFeed(fileName) {
let myshowlist = JSON.parse(fs.readFileSync('config.json')).Shows
let feed = JSON.parse(fs.readFileSync(fileName));
myshowlist.forEach(async show => {
try {
// Find show on feed
let list_filtered_for_show = feed.filter(item => item.title.includes(show.Name))
if (list_filtered_for_show.length > 0) {
// If show is found get url then return all links on that page
let full_link_list_from_page = await getLinksFromURL(list_filtered_for_show[0].link)
// Only get urls with HEVC in name
let urls_with_HEVC_in_url = full_link_list_from_page.filter(item => item.includes('HEVC'))
if (urls_with_HEVC_in_url.length == 0) {
// If no urls with HEVC check for H265
urls_with_HEVC_in_url = full_link_list_from_page.filter(item => item.includes('H265'))
}
// Only keep urls that match show quality
let urls_with_quality_in_url = urls_with_HEVC_in_url.filter(item => item.includes(show.Quality))
// Remove any url trying to direct to a torrent site search
let urls_without_torrent_in_url = urls_with_quality_in_url.filter(item => !item.includes('torrent'))
// Remove any url that doesn't include MeGusta
let only_MeGusta_links = urls_without_torrent_in_url.filter(item => item.includes('MeGusta'))
// NitroFlare doesn't group with the rest of the links in JD, remove them.
let remove_nitroflare = only_MeGusta_links.filter(item => !item.includes('nitro'))
// Send Links to JDdownloader
if (remove_nitroflare.length !== 0) {
log.info(remove_nitroflare.length + ' links for ' + show.Name + ' have been sent to JDdownloader')
linkAdder(remove_nitroflare)
} else {
// No HEVC links found
log.info(remove_nitroflare.length + ' HEVC links for ' + show.Name + ' have been found')
}
} else {
// Show not found on the current feed cache
log.info(show.Name + ' not on feed')
}
} catch (error) {
log.error('Something went wrong ' + error)
}
})
}
module.exports = {
filterFeed
}

26
LinkGrabber.js Normal file
View File

@ -0,0 +1,26 @@
const axios = require('axios');
var cheerio = require('cheerio');
async function getLinksFromURL(url) {
try {
let links = [];
let httpResponse = await axios.get(url);
let $ = cheerio.load(httpResponse.data);
let linkObjects = $('a'); // get all hyperlinks
linkObjects.each((index, element) => {
links.push(
$(element).attr('href'), // get the href attribute
);
});
return links;
} catch (e) { console.log(e) }
}
module.exports = {
getLinksFromURL
}

29
feed.js
View File

@ -1,29 +0,0 @@
const extractUrls = require("extract-urls");
const fs = require('fs')
const { linkAdder } = require('./jd-link-adder');
const log = require('simple-node-logger').createSimpleLogger('jdrssdownloader.log');
function filterFeed(fileName) {
let myshowlist = JSON.parse(fs.readFileSync('config.json')).Shows
let feed = JSON.parse(fs.readFileSync(fileName));
myshowlist.forEach(show => {
try {
let list_filtered_for_show = feed.filter(item => item.title.includes(show.Name))
let extracted_urls_for_show = extractUrls(list_filtered_for_show[0]["content:encoded"]);
let urls_with_HEVC_in_url = extracted_urls_for_show.filter(item => item.includes('HEVC') || item.includes('H265'))
let urls_with_quality_in_url = urls_with_HEVC_in_url.filter(item => item.includes(show.Quality))
let urls_without_torrent_in_url = urls_with_quality_in_url.filter(item => !item.includes('torrent'))
log.info(show.Name + ' - ' + urls_without_torrent_in_url)
linkAdder(urls_without_torrent_in_url)
// console.log(urls_without_torrent_in_url)
} catch (error) {
log.info(show.Name + ' not on feed')
}
})
}
module.exports = {
filterFeed
}

39
main.js
View File

@ -1,33 +1,34 @@
// Import dependencies // Import dependencies
const fs = require("fs"); const fs = require("fs");
const Parser = require("rss-parser"); const Parser = require("rss-parser");
const { filterFeed } = require("./feed"); const { filterFeed } = require("./FeedFilter");
const lodash = require('lodash'); const lodash = require('lodash');
const log = require('simple-node-logger').createSimpleLogger('jdrssdownloader.log'); const log = require('simple-node-logger').createSimpleLogger('jdrssdownloader.log');
(async function main() { (async function main() {
// Make a new RSS Parser // Make a new RSS Parser
const parser = new Parser(); const parser = new Parser();
// Get all the items in the RSS feed // Get all the items in the RSS feed
const feed = await parser.parseURL(JSON.parse(fs.readFileSync('config.json')).RSSFeed); const feed = await parser.parseURL(JSON.parse(fs.readFileSync('config.json')).RSSFeed);
let items = []; let items = [];
// Clean up the string and replace reserved characters // Clean up the string and replace reserved characters
const fileName = `${feed.title.replace(/\s+/g, "-").replace(/[/\\?%*:|"<>]/g, '').toLowerCase()}.json`; const fileName = `${feed.title.replace(/\s+/g, "-").replace(/[/\\?%*:|"<>]/g, '').toLowerCase()}.json`;
if (fs.existsSync(fileName)) { if (fs.existsSync(fileName)) {
items = require(`./${fileName}`); items = require(`./${fileName}`);
} }
let updatedArray = lodash.unionBy(feed.items, items, 'title'); // Compare existing cache and new items and merge differences
let updatedArray = lodash.unionBy(feed.items, items, 'title');
// Save the file // Save the file
log.info(updatedArray.length + ' items in file cache') log.info(updatedArray.length + ' items in file cache')
fs.writeFileSync(fileName, JSON.stringify(updatedArray)); fs.writeFileSync(fileName, JSON.stringify(updatedArray));
// run next part // run next part
filterFeed(fileName) filterFeed(fileName)
})(); })();

View File

@ -9,6 +9,8 @@
"author": "Karl0ss", "author": "Karl0ss",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"axios": "^0.27.2",
"cheerio": "^1.0.0-rc.11",
"extract-urls": "^1.3.2", "extract-urls": "^1.3.2",
"jdownloader-client": "^1.0.0", "jdownloader-client": "^1.0.0",
"lodash": "^4.17.21", "lodash": "^4.17.21",