random fixes

This commit is contained in:
GitBluub
2023-02-25 11:30:59 +09:00
committed by Clément Le Bihan
parent 45c07b68a0
commit 7edc5b75ef
2 changed files with 58 additions and 51 deletions
+10 -10
View File
@@ -3,16 +3,16 @@ import { router } from './routes.js';
// PlaywrightCrawler crawls the web using a headless
// browser controlled by the Playwright library.
const crawler = new PlaywrightCrawler({
launchContext: {
userDataDir: "/home/bluub/.config/chromium",
},
maxConcurrency: 1,
requestHandler: router,
// This function is called if the page processing failed more than maxRequestRetries+1 times.
failedRequestHandler({ request, log }) {
log.info(`Request ${request.url} failed too many times.`);
},
// headless: false,
launchContext: {
userDataDir: "/home/bluub/.config/chromium",
},
maxConcurrency: 1,
requestHandler: router,
// This function is called if the page processing failed more than maxRequestRetries+1 times.
failedRequestHandler({ request, log }) {
log.info(`Request ${request.url} failed too many times.`);
},
headless: true,
});
// Add first URL to the queue and start the crawl.
+48 -41
View File
@@ -1,53 +1,57 @@
import { Dataset, createPlaywrightRouter } from 'crawlee';
import * as fs from 'fs';
import { sleep } from 'crawlee';
export const router = createPlaywrightRouter();
router.addDefaultHandler(async ({ enqueueLinks }) => {
const songs = await enqueueLinks({
selector: 'article a.xrntp',
label: 'SONG',
});
// Find a link to the next page and enqueue it if it exists.
const lists = await enqueueLinks({
selector: '.VECGt',
label: 'LIST',
});
const songs = await enqueueLinks({
selector: 'article a.xrntp',
label: 'SONG',
});
// Find a link to the next page and enqueue it if it exists.
const lists = await enqueueLinks({
selector: '.VECGt',
label: 'LIST',
});
});
router.addHandler('SONG', async ({ request, page }) => {
await Dataset.pushData({ url: request.loadedUrl });
await page.waitForSelector('aside div div section button[name="download"]');
const title = await page.locator('h1').textContent()
// const artist = 'a';
const artist = await page.locator('body > div.js-page.react-container > div > section > aside > div:nth-child(5) > div > section > h3:nth-child(2) > a').textContent()
//const genre = 'b';
const genre = await page.locator('body > div.js-page.react-container > div > section > aside > div:nth-child(6) > div > table > tbody > tr:nth-child(5) > td > div > a').textContent()
await page.locator('aside div div section button[name="download"]').click()
await page.waitForSelector('section.b_r17 button');
const [ downloadMxl ] = await Promise.all([
// Start waiting for the download
page.waitForEvent('download'),
// Perform the action that initiates download
page.locator('section.b_r17 section section div:nth-child(3) button').click(),
]);
// Save downloaded file somewhere
await downloadMxl.saveAs(`../musics/a/${title}/${title}.mxl`);
await Dataset.pushData({ url: request.loadedUrl });
await page.waitForSelector('aside div div section button[name="download"]');
const title = await page.locator('h1').textContent()
// const artist = 'a';
const artist = await page.locator('body > div.js-page.react-container > div > section > aside > div:nth-child(5) > div > section > h3:nth-child(2) > a').first().textContent()
//const genre = 'b';
const genre = await page.locator('body > div.js-page.react-container > div > section > aside > div:nth-child(6) > div > table > tbody > tr:nth-child(5) > td > div > a').first().textContent()
console.log("new song", title, artist, genre)
await page.locator('aside div div section button[name="download"]').click()
await page.waitForSelector('section.b_r17 button');
console.log("downloading Mxl")
const [downloadMxl] = await Promise.all([
// Start waiting for the download
page.waitForEvent('download'),
// Perform the action that initiates download
page.locator('section.b_r17 section section div:nth-child(3) button').click(),
]);
// Save downloaded file somewhere
await downloadMxl.saveAs(`../musics/a/${title}/${title}.mxl`);
await page.locator('body > article > section > button').click();
await page.locator('body > article > section > button').click();
await page.waitForTimeout(15000);
await page.locator('aside div div section button[name="download"]').click()
await page.waitForSelector('section.b_r17 button');
const [ downloadMidi ] = await Promise.all([
// Start waiting for the download
page.waitForEvent('download'),
// Perform the action that initiates download
page.locator('section.b_r17 section section div:nth-child(4) button').click(),
]);
// Save downloaded file somewhere
await downloadMidi.saveAs(`../musics/a/${title}/${title}.midi`);
await page.waitForTimeout(1000);
await page.locator('aside div div section button[name="download"]').click()
await page.waitForSelector('section.b_r17 button');
console.log("downloading Midi")
const [downloadMidi] = await Promise.all([
// Start waiting for the download
page.waitForEvent('download'),
// Perform the action that initiates download
page.locator('section.b_r17 section section div:nth-child(4) button').click(),
]);
// Save downloaded file somewhere
await downloadMidi.saveAs(`../musics/a/${title}/${title}.midi`);
fs.writeFile(`../musics/a/${title}/${title}.ini`, `
fs.writeFile(`../musics/a/${title}/${title}.ini`, `
[Metadata]
Name=${title}
Artist=${artist}
@@ -68,6 +72,9 @@ ChordTiming=0
Length=0
PedalPoint=0
Precision=0
`, () => {})
await page.waitForTimeout(15000);
`, () => { })
console.log("done downloading")
console.log("sleeping for 10k seconds")
await sleep(10_000_000);
});