312 lines
11 KiB
Python
312 lines
11 KiB
Python
import pandas as pd
|
|
import os
|
|
import re
|
|
import datetime
|
|
import time
|
|
import pytz
|
|
import requests
|
|
import ssl
|
|
import urllib.parse
|
|
import OpenSSL
|
|
from dateutil import parser
|
|
|
|
def get_host_info(url):
|
|
parsed_url = urllib.parse.urlparse(url)
|
|
host = parsed_url.netloc
|
|
return host
|
|
|
|
def get_certificate_expiration_date(host):
|
|
result = ''
|
|
hostname = host
|
|
port = 443
|
|
cert = ssl.get_server_certificate((hostname, port)).encode()
|
|
if(cert):
|
|
cert_obj = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, cert)
|
|
cert_expire_time = parser.parse(cert_obj.get_notAfter().decode("UTF-8")).strftime('%Y-%m-%d %H:%M:%S')
|
|
if(cert_obj.has_expired()):
|
|
result = ''
|
|
else:
|
|
current_date = datetime.datetime.now()
|
|
remaining_days = (datetime.datetime.strptime(cert_expire_time, "%Y-%m-%d %H:%M:%S") - current_date).days
|
|
yymmdd_expiration_date = str(cert_expire_time)[0:10]
|
|
result = str(yymmdd_expiration_date)+"(剩"+str(remaining_days)+ "天到期)"
|
|
else:
|
|
result = ''
|
|
return result
|
|
|
|
|
|
|
|
def get_all_tag(website_info_data):
|
|
all_tag = []
|
|
all_tag_info_data = []
|
|
# 遍历数据,获取所有的tag
|
|
for website_info_index, website_info_row in website_info_data.iterrows():
|
|
tag_list = website_info_row["Tag"].split(";")
|
|
pure_tag_list = []
|
|
for tag in tag_list:
|
|
pure_tag = tag.strip()
|
|
if pure_tag != "":
|
|
pure_tag_list.append(pure_tag)
|
|
if pure_tag not in all_tag:
|
|
all_tag.append(pure_tag)
|
|
all_tag_info_data.append([])
|
|
print("pure_tag_list", pure_tag_list)
|
|
print(
|
|
"tag==>>",
|
|
website_info_index,
|
|
website_info_row["Tag"],
|
|
"pure_tag_list==>>",
|
|
pure_tag_list,
|
|
)
|
|
|
|
# 遍历所有数据,将数据放到all_tag_info_data 中
|
|
for website_info_index, website_info_row in website_info_data.iterrows():
|
|
tag_list = website_info_row["Tag"].split(";")
|
|
for tag in tag_list:
|
|
pure_tag = tag.strip()
|
|
if pure_tag != "":
|
|
all_tag_info_data[all_tag.index(pure_tag)].append(website_info_row)
|
|
|
|
print("all_tag", all_tag, "all_tag_info_data", all_tag_info_data)
|
|
return {"all_tag": all_tag, "all_tag_info_data": all_tag_info_data}
|
|
|
|
|
|
def short_url(url):
|
|
result = ""
|
|
if(url.startswith("http://")):
|
|
url = url[7:]
|
|
if(url.startswith("https://")):
|
|
url = url[8:]
|
|
if(url.startswith("www.")):
|
|
url = url[4:]
|
|
if(url.endswith("/")):
|
|
url = url[:-1]
|
|
|
|
if len(url) > 30:
|
|
result = url[0:30] + "..."
|
|
else:
|
|
result = url
|
|
return result
|
|
|
|
|
|
def replaceTemplate(template, reInfo, data):
|
|
|
|
reResult = re.findall(reInfo, template)
|
|
new_read_me = template.replace(reResult[0], data)
|
|
return new_read_me
|
|
|
|
|
|
def create_tag_table_html(tag_name, tag_info_data):
|
|
print("==create_tag_table_html", tag_name)
|
|
website_info_html = "<a href='#目录'>🔙目录</a>" + "\n" + "<table>"
|
|
website_info_html = (
|
|
website_info_html
|
|
+ "<tr>"
|
|
+ "<td width='400'>"
|
|
+ "<span>(づ。◕‿‿◕。)づ</span><br/><span>Name</span>"
|
|
+ "</td>"
|
|
+ "<td>"
|
|
+ "<span> (●゚ω゚●)</span><br/><span>Description</span>"
|
|
+ "</td>"
|
|
+ "</tr>"
|
|
)
|
|
for info_data in tag_info_data:
|
|
print(
|
|
"==>>",
|
|
{
|
|
"Name": info_data["Name"],
|
|
"Url": info_data["Url"],
|
|
"Description": info_data["Description"],
|
|
},
|
|
)
|
|
website_info_html = (
|
|
website_info_html
|
|
+ "<tr>"
|
|
+ "<td>"
|
|
+ info_data["Name"]
|
|
+ "</td>"
|
|
+ "<td>"
|
|
+ info_data["Description"]
|
|
+ "</td>"
|
|
+ "</tr>"
|
|
)
|
|
|
|
website_info_html = website_info_html + "</table>" + "\n" + "<a href='#目录'>🔙目录</a>" + "\n"
|
|
return website_info_html
|
|
|
|
|
|
def main():
|
|
print("当前服务器时间", datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
|
|
# 读取csv文件
|
|
website_info_data = pd.read_csv("./website_info.csv")
|
|
# 反转数据,保证最新的数据在最前面
|
|
website_info_data = website_info_data.reindex(index=website_info_data.index[::-1])
|
|
print(website_info_data)
|
|
# 遍历数据
|
|
for website_info_index, website_info_row in website_info_data.iterrows():
|
|
print("=start=>>", website_info_index, website_info_row["Url"])
|
|
# 检测网站可用性,记录请求时间,完成数据拼接
|
|
expiration_date=''
|
|
try:
|
|
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
|
|
# 检测网站是否正常
|
|
website_info_row_url_result = requests.get(
|
|
website_info_row["Url"], timeout=5, headers=headers
|
|
)
|
|
|
|
try:
|
|
tmp_host = get_host_info(website_info_row["Url"])
|
|
expiration_date_result = get_certificate_expiration_date(tmp_host)
|
|
if(len(expiration_date_result) > 0):
|
|
expiration_date = expiration_date_result
|
|
except Exception as e:
|
|
expiration_date = ''
|
|
print("!!expiration_date", expiration_date)
|
|
total_ms = str(
|
|
int(website_info_row_url_result.elapsed.total_seconds() * 1000)
|
|
)
|
|
# 响应码为2开头,标注绿色,否则标注红色
|
|
if website_info_row_url_result.status_code:
|
|
website_info_row["Name"] = (
|
|
"<span style='font-weight: 600'>"
|
|
+ website_info_row["Name"]
|
|
+ "</span>"
|
|
+ "<span>"
|
|
+ (
|
|
" 🟢 " + total_ms + "ms"
|
|
if (str(website_info_row_url_result.status_code).startswith("1") or str(website_info_row_url_result.status_code).startswith("2") or str(website_info_row_url_result.status_code).startswith("3") or str(website_info_row_url_result.status_code).startswith("4"))
|
|
else " 🔴"
|
|
)
|
|
+ "</span><br/>"
|
|
|
|
)
|
|
# 无法响应,标注红色
|
|
except Exception as e:
|
|
print("error==", e)
|
|
website_info_row["Name"] = (
|
|
"<span style='font-weight: 600'>"
|
|
+ website_info_row["Name"]
|
|
+ " 🔴"
|
|
+ "</span><br/>"
|
|
)
|
|
finally:
|
|
website_info_row["Name"] = (
|
|
"<span>"
|
|
+ website_info_row["Name"]
|
|
+ "</span>"
|
|
+ "<a href='"
|
|
+ website_info_row["Url"]
|
|
+ "'>"
|
|
+ (short_url(website_info_row["Url"]))
|
|
+ "</a>"
|
|
+ (
|
|
"<br/><span>SSL证书到期时间:" + expiration_date + "</span>"
|
|
if (expiration_date and ("🟢" in website_info_row["Name"])) else ""
|
|
)
|
|
)
|
|
print("finish", website_info_row["Url"], website_info_row["Name"])
|
|
# 完成table数据拼接
|
|
website_info_html = "<table>"
|
|
website_info_html = (
|
|
website_info_html
|
|
+ "<tr>"
|
|
+ "<td width='400'>"
|
|
+ "<span>(づ。◕‿‿◕。)づ</span><br/><span>Name</span>"
|
|
+ "</td>"
|
|
+ "<td>"
|
|
+ "<span> (●゚ω゚●)</span><br/><span>Description</span>"
|
|
+ "</td>"
|
|
+ "<td width='300'>"
|
|
+ "<span> ︿( ̄︶ ̄)︿</span><br/><span>Tag</span>"
|
|
+ "</td>"
|
|
+ "</tr>"
|
|
)
|
|
for website_info_index, website_info_row in website_info_data.iterrows():
|
|
website_info_html = (
|
|
website_info_html
|
|
+ "<tr>"
|
|
+ "<td>"
|
|
+ website_info_row["Name"]
|
|
+ "</td>"
|
|
+ "<td>"
|
|
+ website_info_row["Description"]
|
|
+ "</td>"
|
|
+ "<td>"
|
|
+ website_info_row["Tag"]
|
|
+ "</td>"
|
|
+ "</tr>"
|
|
)
|
|
website_info_html = website_info_html + "</table>"
|
|
# 根据EditREADME.md模板,替换占位符, 生成最终数据
|
|
readme_md = ""
|
|
with open(os.path.join(os.getcwd(), "EditREADME.md"), "r") as load_f:
|
|
readme_md = load_f.read()
|
|
mail_re = r"--insStart----insEnd--"
|
|
in_datetime = datetime.datetime.fromtimestamp(
|
|
int(time.time()), pytz.timezone("Asia/Shanghai")
|
|
).strftime("%Y-%m-%d %H:%M:%S")
|
|
all_info_content = (
|
|
"\n\n"
|
|
+ "## 开源灵感库已收录"
|
|
+ str(len(website_info_data))
|
|
+ "束灵感INS!"
|
|
+ "(~ ̄▽ ̄)~更新时间("
|
|
+ in_datetime
|
|
+ ")\n\n"
|
|
+ website_info_html
|
|
+ "\n\n"
|
|
)
|
|
new_read_me = replaceTemplate(readme_md, mail_re, all_info_content)
|
|
print("new_read_me", new_read_me)
|
|
|
|
# 生成类别数据
|
|
tag_re = r"--tagStart----tagEnd--"
|
|
all_tag_result = get_all_tag(website_info_data)
|
|
all_tag = all_tag_result["all_tag"]
|
|
all_tag_info_data = all_tag_result["all_tag_info_data"]
|
|
print("==all_tag_info_data==", all_tag_info_data)
|
|
print("==all_tag==", all_tag)
|
|
all_tag_content = ""
|
|
|
|
for tag_content in all_tag:
|
|
tag_html = create_tag_table_html(
|
|
tag_content, all_tag_info_data[all_tag.index(tag_content)]
|
|
)
|
|
tag_whole_content = "## " + tag_content + "\n\n" + tag_html + "\n\n"
|
|
all_tag_content = all_tag_content + tag_whole_content
|
|
|
|
new_read_me = replaceTemplate(new_read_me, tag_re, all_tag_content)
|
|
|
|
# 添加索引锚点
|
|
tag_index_info = ""
|
|
for tag_index, tag_content in enumerate(all_tag):
|
|
if tag_index != (len(all_tag) - 1):
|
|
tag_index_info = (
|
|
tag_index_info
|
|
+ "<a href='#"
|
|
+ tag_content
|
|
+ "'>"
|
|
+ tag_content + "(" + str(len(all_tag_info_data[all_tag.index(tag_content)])) + ")"
|
|
+ "</a>"
|
|
+ ", "
|
|
)
|
|
else:
|
|
tag_index_info = (
|
|
tag_index_info
|
|
+ "<a href='#"
|
|
+ tag_content
|
|
+ "'>"
|
|
+ tag_content + "(" + str(len(all_tag_info_data[all_tag.index(tag_content)])) + ")"
|
|
+ "</a>"
|
|
)
|
|
|
|
tag_index_re = r"--tagIndexInfoStart----tagIndexInfoEnd--"
|
|
|
|
new_read_me = replaceTemplate(new_read_me, tag_index_re, tag_index_info)
|
|
|
|
# 将生成的数据写入README.md
|
|
with open(os.path.join(os.getcwd(), "README.md"), "w") as load_f:
|
|
load_f.write(new_read_me)
|
|
|
|
main()
|