Why Nostr? What is Njump?
2025-04-30 05:37:51

颜值精选官 on Nostr: ``` import asyncio import time import requests from bs4 import BeautifulSoup import ...

```
import asyncio
import time
import requests
from bs4 import BeautifulSoup
import os
import urllib.parse
import re

from nostr_poster import post_to_nostr

def extract_redirect_url_from_onclick(onclick_attr):
"""
Extract the URL from JavaScript onclick attribute
Example: onclick="location.href='https://v2.jk.rs/2022/12/22/275.html';";
"""
if not onclick_attr:
return None

# Extract URL from location.href='URL' pattern
pattern = r"location\.href=\'(https?://[^\']+)\'"
match = re.search(pattern, onclick_attr)
if match:
return match.group(1)

return None

def extract_images(url):
"""
Extract all images from the specified webpage URL
"""
# Send HTTP request to the webpage
print(f"Fetching webpage: {url}")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36'
}
try:
response = requests.get(url, headers=headers)
response.raise_for_status() # Raise exception for 4XX/5XX responses
except requests.exceptions.RequestException as e:
print(f"Error fetching the webpage: {e}")
return []

# Parse HTML content
soup = BeautifulSoup(response.text, 'html.parser')
title = soup.title.string.strip() if soup.title and soup.title.string else "无题"
# Only keep the part of the title before the first dash
title = title.split('-')[0].strip()

# Find gallery divs with data-src attributes (new pattern)
gallery_divs = soup.find_all('div', attrs={'data-fancybox': 'gallery'})
print(f"Found {len(gallery_divs)} gallery divs")

# Extract images
image_urls = []

# Then process gallery divs with data-src
for div in gallery_divs:
data_src = div.get('data-src')
if data_src:
# Handle relative URLs
if not data_src.startswith('http'):
data_src = urllib.parse.urljoin(url, data_src)
image_urls.append(data_src)

# Remove duplicates while preserving order
unique_urls = []
for img_url in image_urls:
if img_url not in unique_urls:
unique_urls.append(img_url)

print(f"Found {len(unique_urls)} unique image URLs on the webpage")
return title, unique_urls

def find_article_urls(base_url):
"""
Extract article URLs from the base website that use JavaScript redirects
"""
print(f"Looking for article links on: {base_url}")
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36'
}

try:
response = requests.get(base_url, headers=headers)
response.raise_for_status()
except requests.exceptions.RequestException as e:
print(f"Error fetching the base webpage: {e}")
return []

soup = BeautifulSoup(response.text, 'html.parser')
redirect_url = ""

# Find all anchor tags
anchors = soup.find_all('a')
for anchor in anchors:
# Check for JavaScript onclick redirect
onclick_attr = anchor.get('onclick')
redirect_url = extract_redirect_url_from_onclick(onclick_attr)

if redirect_url:
return redirect_url

return redirect_url


async def main():
while True:
# Main website URL
base_url = "https://v2.jk.rs/";

# Find article URLs with JavaScript redirects
article_url = find_article_urls(base_url)

if not article_url:
print("No article URLs found. Using the base URL instead.")
continue

print(f"\nProcessing article: {article_url}")
title, image_urls = extract_images(article_url)
print(f"Found {len(image_urls)} images on {article_url}, title: {title}")
if image_urls:
for img in image_urls:
print(f"Image URL: {img}")
# Post to Nostr
await post_to_nostr("随机妹子图", title, image_urls)

# sleep 4 hours
time.sleep(4 * 60 * 60)


if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
print("\nProcess interrupted by user.")
except Exception as e:
print(f"An error occurred: {e}, restarting the program.")
asyncio.run(main())
finally:
print("Exiting the program.")
```
Author Public Key
npub1d5ygkef6r0l7w29ek9l9c7hulsvdshms2qh74jp5qpfyad4g6h5s4ap6lz