crawler piano only
This commit is contained in:
committed by
Clément Le Bihan
parent
3335516f0e
commit
9b12c76978
@@ -1,5 +1,5 @@
|
||||
import { PlaywrightCrawler, Dataset } from 'crawlee';
|
||||
import { router } from './routes.js';
|
||||
import { PlaywrightCrawler, Dataset } from "crawlee";
|
||||
import { router } from "./routes.js";
|
||||
// PlaywrightCrawler crawls the web using a headless
|
||||
// browser controlled by the Playwright library.
|
||||
const crawler = new PlaywrightCrawler({
|
||||
@@ -16,4 +16,6 @@ const crawler = new PlaywrightCrawler({
|
||||
});
|
||||
|
||||
// Add first URL to the queue and start the crawl.
|
||||
await crawler.run(['https://musescore.com/sheetmusic?license=to_modify_commercially%2Cto_use_commercially&recording_type=public-domain']);
|
||||
await crawler.run([
|
||||
"https://musescore.com/sheetmusic?complexity=1&instrument=2&license=to_modify_commercially%2Cto_use_commercially&recording_type=public-domain",
|
||||
]);
|
||||
|
||||
@@ -1,57 +1,70 @@
|
||||
import { Dataset, createPlaywrightRouter } from 'crawlee';
|
||||
import * as fs from 'fs';
|
||||
import { sleep } from 'crawlee';
|
||||
import { Dataset, createPlaywrightRouter } from "crawlee";
|
||||
import * as fs from "fs";
|
||||
import { sleep } from "crawlee";
|
||||
export const router = createPlaywrightRouter();
|
||||
|
||||
router.addDefaultHandler(async ({ enqueueLinks }) => {
|
||||
const songs = await enqueueLinks({
|
||||
selector: 'article a.xrntp',
|
||||
label: 'SONG',
|
||||
selector: "article a.xrntp",
|
||||
label: "SONG",
|
||||
});
|
||||
// Find a link to the next page and enqueue it if it exists.
|
||||
const lists = await enqueueLinks({
|
||||
selector: '.VECGt',
|
||||
label: 'LIST',
|
||||
selector: ".VECGt",
|
||||
label: "LIST",
|
||||
});
|
||||
});
|
||||
|
||||
router.addHandler('SONG', async ({ request, page }) => {
|
||||
router.addHandler("SONG", async ({ request, page }) => {
|
||||
await Dataset.pushData({ url: request.loadedUrl });
|
||||
await page.waitForSelector('aside div div section button[name="download"]');
|
||||
const title = await page.locator('h1').textContent()
|
||||
// const artist = 'a';
|
||||
const artist = await page.locator('body > div.js-page.react-container > div > section > aside > div:nth-child(5) > div > section > h3:nth-child(2) > a').first().textContent()
|
||||
//const genre = 'b';
|
||||
const genres = await page.locator('body > div.js-page.react-container > div > section > aside > div:nth-child(6) > div > table > tbody > tr:nth-child(5) > td > div > a').allTextContents()
|
||||
console.log("new song", title, artist, genres)
|
||||
await page.locator('aside div div section button[name="download"]').click()
|
||||
await page.waitForSelector('section.b_r17 button');
|
||||
console.log("downloading Mxl")
|
||||
const title = await page.locator("h1").textContent();
|
||||
const artist = await page
|
||||
.locator(
|
||||
"body > div.js-page.react-container > div > section > aside > div:nth-child(5) > div > section > h3:nth-child(2) > a"
|
||||
)
|
||||
.first()
|
||||
.textContent();
|
||||
const genres = await page
|
||||
.locator(
|
||||
"body > div.js-page.react-container > div > section > aside > div:nth-child(6) > div > table > tbody > tr:nth-child(5) > td > div > a"
|
||||
)
|
||||
.allTextContents();
|
||||
console.log("new song", title, artist, genres);
|
||||
await page.locator('aside div div section button[name="download"]').click();
|
||||
await page.waitForSelector("section.b_r17 button");
|
||||
console.log("downloading Mxl");
|
||||
const [downloadMxl] = await Promise.all([
|
||||
// Start waiting for the download
|
||||
page.waitForEvent('download'),
|
||||
page.waitForEvent("download"),
|
||||
// Perform the action that initiates download
|
||||
page.locator('section.b_r17 section section div:nth-child(3) button').click(),
|
||||
page
|
||||
.locator("section.b_r17 section section div:nth-child(3) button")
|
||||
.click(),
|
||||
]);
|
||||
// Save downloaded file somewhere
|
||||
await downloadMxl.saveAs(`../musics/a/${title}/${title}.mxl`);
|
||||
|
||||
await page.locator('body > article > section > button').click();
|
||||
await page.locator("body > article > section > button").click();
|
||||
|
||||
await page.waitForTimeout(1000);
|
||||
await page.locator('aside div div section button[name="download"]').click()
|
||||
await page.waitForSelector('section.b_r17 button');
|
||||
console.log("downloading Midi")
|
||||
await page.locator('aside div div section button[name="download"]').click();
|
||||
await page.waitForSelector("section.b_r17 button");
|
||||
console.log("downloading Midi");
|
||||
const [downloadMidi] = await Promise.all([
|
||||
// Start waiting for the download
|
||||
page.waitForEvent('download'),
|
||||
page.waitForEvent("download"),
|
||||
// Perform the action that initiates download
|
||||
page.locator('section.b_r17 section section div:nth-child(4) button').click(),
|
||||
page
|
||||
.locator("section.b_r17 section section div:nth-child(4) button")
|
||||
.click(),
|
||||
]);
|
||||
// Save downloaded file somewhere
|
||||
await downloadMidi.saveAs(`../musics/a/${title}/${title}.midi`);
|
||||
|
||||
fs.writeFile(`../musics/a/${title}/${title}.ini`, `
|
||||
fs.writeFile(
|
||||
`../musics/a/${title}/${title}.ini`,
|
||||
`
|
||||
[Metadata]
|
||||
Name=${title}
|
||||
Artist=${artist}
|
||||
@@ -72,9 +85,11 @@ ChordTiming=0
|
||||
Length=0
|
||||
PedalPoint=0
|
||||
Precision=0
|
||||
`, () => { })
|
||||
console.log("done downloading")
|
||||
`,
|
||||
() => { }
|
||||
);
|
||||
console.log("done downloading");
|
||||
|
||||
console.log("sleeping for 10k seconds")
|
||||
//console.log("sleeping for 10k seconds")
|
||||
//await sleep(10_000_000);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user