向阳乔木 on Nostr: Lex ...
Lex Fridman的播客经常会采访一些大牛,搜到个Kaggle上的数据集,有300多期访谈CSV文件。
https://t.co/sHNLx6neMx下载后让Claude写个Python程序生成300个txt文件,传到notebookLM使用。
生成文件Python代码:
import csv
import os
import sys
# 增加CSV字段大小限制
csv.field_size_limit(sys.maxsize)
def create_txt_files_from_csv(csv_file_path):
# 确保输出目录存在
output_dir = 'output_txt_files'
os.makedirs(output_dir, exist_ok=True)
# 读取CSV文件
with open(csv_file_path, 'r', newline='', encoding='utf-8') as csvfile:
csv_reader = csv.DictReader(csvfile)
# 遍历CSV的每一行
for row in csv_reader:
# 获取title和text
title = row['title']
text = row['text']
# 创建安全的文件名(移除不允许的字符)
safe_title = "".join([c for c in title if c.isalpha() or c.isdigit() or c==' ']).rstrip()
# 如果文件名为空,使用id作为文件名
if not safe_title:
safe_title = f"file_{row['id']}"
# 创建文件路径
file_path = os.path.join(output_dir, f"{safe_title}.txt")
# 写入文本文件
with open(file_path, 'w', encoding='utf-8') as txtfile:
txtfile.write(text)
print(f"Created file: {file_path}")
# 使用函数
csv_file_path = 'podcastdata_dataset.csv' # 替换为你的CSV文件路径
create_txt_files_from_csv(csv_file_path)
Published at
2024-08-29 01:01:34Event JSON
{
"id": "b36e44c8bc67ad883b42bd31b2ac27ad2330d170546ad8e45e5b5b12f043435a",
"pubkey": "dc78ed6115492137f00a39f69408c7120d162f16436522b53ab6ebb28c164a6e",
"created_at": 1724893294,
"kind": 1,
"tags": [],
"content": "Lex Fridman的播客经常会采访一些大牛,搜到个Kaggle上的数据集,有300多期访谈CSV文件。\nhttps://t.co/sHNLx6neMx\n\n下载后让Claude写个Python程序生成300个txt文件,传到notebookLM使用。\n\n生成文件Python代码:\n\nimport csv\nimport os\nimport sys\n\n# 增加CSV字段大小限制\ncsv.field_size_limit(sys.maxsize)\n\ndef create_txt_files_from_csv(csv_file_path):\n # 确保输出目录存在\n output_dir = 'output_txt_files'\n os.makedirs(output_dir, exist_ok=True)\n \n # 读取CSV文件\n with open(csv_file_path, 'r', newline='', encoding='utf-8') as csvfile:\n csv_reader = csv.DictReader(csvfile)\n \n # 遍历CSV的每一行\n for row in csv_reader:\n # 获取title和text\n title = row['title']\n text = row['text']\n \n # 创建安全的文件名(移除不允许的字符)\n safe_title = \"\".join([c for c in title if c.isalpha() or c.isdigit() or c==' ']).rstrip()\n \n # 如果文件名为空,使用id作为文件名\n if not safe_title:\n safe_title = f\"file_{row['id']}\"\n \n # 创建文件路径\n file_path = os.path.join(output_dir, f\"{safe_title}.txt\")\n \n # 写入文本文件\n with open(file_path, 'w', encoding='utf-8') as txtfile:\n txtfile.write(text)\n \n print(f\"Created file: {file_path}\")\n\n# 使用函数\ncsv_file_path = 'podcastdata_dataset.csv' # 替换为你的CSV文件路径\ncreate_txt_files_from_csv(csv_file_path) https://pbs.twimg.com/media/GWG4KwPbsAA-Df4.jpg https://pbs.twimg.com/media/GWG4YyvasAAUD8N.jpg https://pbs.twimg.com/media/GWG4eS2bMAAcqE-.jpg",
"sig": "507f09b27668c2d593e443764d4e23f5a56cd8672e3cd1ccdae9e852e370d86a3d5b0c3468091b5372c8f8681befdeafb7f1e7cfab131ef0db702da1fdbac579"
}