Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,286 +1,75 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
-
import
|
4 |
-
import requests
|
5 |
-
import time
|
6 |
-
from collections import defaultdict
|
7 |
|
8 |
-
|
9 |
-
st.set_page_config(layout="wide", page_title="影城效率与内容分析工具")
|
10 |
|
|
|
11 |
|
12 |
-
#
|
13 |
-
|
14 |
-
|
15 |
-
return title
|
16 |
-
return title.split(' ', 1)[0]
|
17 |
|
18 |
-
|
19 |
-
def style_efficiency(row):
|
20 |
-
green = 'background-color: #E6F5E6;' # Light Green
|
21 |
-
red = 'background-color: #FFE5E5;' # Light Red
|
22 |
-
default = ''
|
23 |
-
styles = [default] * len(row)
|
24 |
-
seat_efficiency = row.get('座次效率', 0)
|
25 |
-
session_efficiency = row.get('场次效率', 0)
|
26 |
-
if seat_efficiency > 1.5 or session_efficiency > 1.5:
|
27 |
-
styles = [green] * len(row)
|
28 |
-
elif seat_efficiency < 0.5 or session_efficiency < 0.5:
|
29 |
-
styles = [red] * len(row)
|
30 |
-
return styles
|
31 |
-
|
32 |
-
|
33 |
-
def process_and_analyze_data(df):
|
34 |
-
if df.empty:
|
35 |
-
return pd.DataFrame()
|
36 |
-
analysis_df = df.groupby('影片名称_清理后').agg(
|
37 |
-
座位数=('座位数', 'sum'),
|
38 |
-
场次=('影片名称_清理后', 'size'),
|
39 |
-
票房=('总收入', 'sum'),
|
40 |
-
人次=('总人次', 'sum')
|
41 |
-
).reset_index()
|
42 |
-
analysis_df.rename(columns={'影片名称_清理后': '影片'}, inplace=True)
|
43 |
-
analysis_df = analysis_df.sort_values(by='票房', ascending=False).reset_index(drop=True)
|
44 |
-
total_seats = analysis_df['座位数'].sum()
|
45 |
-
total_sessions = analysis_df['场次'].sum()
|
46 |
-
total_revenue = analysis_df['票房'].sum()
|
47 |
-
analysis_df['均价'] = np.divide(analysis_df['票房'], analysis_df['人次']).fillna(0)
|
48 |
-
analysis_df['座次比'] = np.divide(analysis_df['座位数'], total_seats).fillna(0)
|
49 |
-
analysis_df['场次比'] = np.divide(analysis_df['场次'], total_sessions).fillna(0)
|
50 |
-
analysis_df['票房比'] = np.divide(analysis_df['票房'], total_revenue).fillna(0)
|
51 |
-
analysis_df['座次效率'] = np.divide(analysis_df['票房比'], analysis_df['座次比']).fillna(0)
|
52 |
-
analysis_df['场次效率'] = np.divide(analysis_df['票房比'], analysis_df['场次比']).fillna(0)
|
53 |
-
final_columns = ['影片', '座位数', '场次', '票房', '人次', '均价', '座次比', '场次比', '票房比', '座次效率',
|
54 |
-
'场次效率']
|
55 |
-
analysis_df = analysis_df[final_columns]
|
56 |
-
return analysis_df
|
57 |
-
|
58 |
-
|
59 |
-
# --- New Feature: Server Movie Content Inquiry ---
|
60 |
-
@st.cache_data(show_spinner=False)
|
61 |
-
def fetch_and_process_server_movies(priority_movie_titles=None):
|
62 |
-
if priority_movie_titles is None:
|
63 |
-
priority_movie_titles = []
|
64 |
-
|
65 |
-
# 1. Get Token
|
66 |
-
token_headers = {
|
67 |
-
'Host': 'oa.hengdianfilm.com:7080', 'Content-Type': 'application/json',
|
68 |
-
'Origin': 'http://115.239.253.233:7080', 'Connection': 'keep-alive',
|
69 |
-
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
70 |
-
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 18_5_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/138.0.7204.156 Mobile/15E148 Safari/604.1',
|
71 |
-
'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
|
72 |
-
}
|
73 |
-
token_json_data = {'appId': 'hd', 'appSecret': 'ad761f8578cc6170', 'timeStamp': int(time.time() * 1000)}
|
74 |
-
token_url = 'http://oa.hengdianfilm.com:7080/cinema-api/admin/generateToken?token=hd&murl=?token=hd&murl=ticket=-1495916529737643774'
|
75 |
-
response = requests.post(token_url, headers=token_headers, json=token_json_data, timeout=10)
|
76 |
-
response.raise_for_status()
|
77 |
-
token_data = response.json()
|
78 |
-
if token_data.get('error_code') != '0000':
|
79 |
-
raise Exception(f"获取Token失败: {token_data.get('error_desc')}")
|
80 |
-
auth_token = token_data['param']
|
81 |
-
|
82 |
-
# 2. Fetch movie list (with pagination and delay)
|
83 |
-
all_movies = []
|
84 |
-
page_index = 1
|
85 |
-
while True:
|
86 |
-
list_headers = {
|
87 |
-
'Accept': 'application/json, text/javascript, */*; q=0.01',
|
88 |
-
'Content-Type': 'application/json; charset=UTF-8',
|
89 |
-
'Origin': 'http://115.239.253.233:7080', 'Proxy-Connection': 'keep-alive', 'Token': auth_token,
|
90 |
-
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
|
91 |
-
'X-SESSIONID': 'PQ0J3K85GJEDVYIGZE1KEG1K80USDAP4',
|
92 |
-
}
|
93 |
-
list_params = {'token': 'hd', 'murl': 'ContentMovie'}
|
94 |
-
list_json_data = {'THEATER_ID': 38205954, 'SOURCE': 'SERVER', 'ASSERT_TYPE': 2, 'PAGE_CAPACITY': 20,
|
95 |
-
'PAGE_INDEX': page_index}
|
96 |
-
list_url = 'http://oa.hengdianfilm.com:7080/cinema-api/cinema/server/dcp/list'
|
97 |
-
response = requests.post(list_url, params=list_params, headers=list_headers, json=list_json_data, verify=False)
|
98 |
-
response.raise_for_status()
|
99 |
-
movie_data = response.json()
|
100 |
-
if movie_data.get("RSPCD") != "000000":
|
101 |
-
raise Exception(f"获取影片列表失败: {movie_data.get('RSPMSG')}")
|
102 |
-
body = movie_data.get("BODY", {})
|
103 |
-
movies_on_page = body.get("LIST", [])
|
104 |
-
if not movies_on_page: break
|
105 |
-
all_movies.extend(movies_on_page)
|
106 |
-
if len(all_movies) >= body.get("COUNT", 0): break
|
107 |
-
page_index += 1
|
108 |
-
time.sleep(1) # Add 1-second delay between requests
|
109 |
-
|
110 |
-
# 3. Process data into a central, detailed structure
|
111 |
-
movie_details = {}
|
112 |
-
for movie in all_movies:
|
113 |
-
content_name = movie.get('CONTENT_NAME')
|
114 |
-
if not content_name: continue
|
115 |
-
movie_details[content_name] = {
|
116 |
-
'assert_name': movie.get('ASSERT_NAME'),
|
117 |
-
'halls': sorted([h.get('HALL_NAME') for h in movie.get('HALL_INFO', [])]),
|
118 |
-
'play_time': movie.get('PLAY_TIME')
|
119 |
-
}
|
120 |
-
|
121 |
-
# 4. Prepare data for the two display views
|
122 |
-
by_hall = defaultdict(list)
|
123 |
-
for content_name, details in movie_details.items():
|
124 |
-
for hall_name in details['halls']:
|
125 |
-
by_hall[hall_name].append({'content_name': content_name, 'details': details})
|
126 |
-
|
127 |
-
for hall_name in by_hall:
|
128 |
-
by_hall[hall_name].sort(key=lambda item: (
|
129 |
-
item['details']['assert_name'] is None or item['details']['assert_name'] == '',
|
130 |
-
item['details']['assert_name'] or item['content_name']
|
131 |
-
))
|
132 |
-
|
133 |
-
view2_list = []
|
134 |
-
for content_name, details in movie_details.items():
|
135 |
-
if details.get('assert_name'):
|
136 |
-
view2_list.append({
|
137 |
-
'assert_name': details['assert_name'],
|
138 |
-
'content_name': content_name,
|
139 |
-
'halls': details['halls'],
|
140 |
-
'play_time': details['play_time']
|
141 |
-
})
|
142 |
-
|
143 |
-
priority_list = [item for item in view2_list if
|
144 |
-
any(p_title in item['assert_name'] for p_title in priority_movie_titles)]
|
145 |
-
other_list_items = [item for item in view2_list if item not in priority_list]
|
146 |
-
|
147 |
-
priority_list.sort(key=lambda x: x['assert_name'])
|
148 |
-
other_list_items.sort(key=lambda x: x['assert_name'])
|
149 |
-
|
150 |
-
final_sorted_list = priority_list + other_list_items
|
151 |
-
|
152 |
-
return dict(sorted(by_hall.items())), final_sorted_list
|
153 |
-
|
154 |
-
|
155 |
-
def get_circled_number(hall_name):
|
156 |
-
mapping = {'1': '①', '2': '②', '3': '③', '4': '④', '5': '⑤', '6': '⑥', '7': '⑦', '8': '⑧', '9': '⑨'}
|
157 |
-
num_str = ''.join(filter(str.isdigit, hall_name))
|
158 |
-
return mapping.get(num_str, '')
|
159 |
-
|
160 |
-
|
161 |
-
def format_play_time(time_str):
|
162 |
-
if not time_str or not isinstance(time_str, str): return None
|
163 |
try:
|
164 |
-
|
165 |
-
|
166 |
-
except (ValueError, IndexError):
|
167 |
-
return None
|
168 |
-
|
169 |
|
170 |
-
# ---
|
171 |
-
|
172 |
-
|
173 |
-
for index, row in analysis_df.iterrows():
|
174 |
-
movie_title = row['影片']
|
175 |
-
found_versions = []
|
176 |
-
for tms_movie in tms_movie_list:
|
177 |
-
# FIX 3: Change matching from 'in' to 'startswith'
|
178 |
-
if tms_movie['assert_name'].startswith(movie_title):
|
179 |
-
version_name = tms_movie['assert_name'].replace(movie_title, '').strip()
|
180 |
-
circled_halls = " ".join(sorted([get_circled_number(h) for h in tms_movie['halls']]))
|
181 |
-
|
182 |
-
# FIX 2: Handle empty version name to remove colon
|
183 |
-
if version_name:
|
184 |
-
found_versions.append(f"{version_name}:{circled_halls}")
|
185 |
-
else:
|
186 |
-
found_versions.append(circled_halls)
|
187 |
-
|
188 |
-
locations.append('|'.join(found_versions))
|
189 |
|
190 |
-
|
191 |
-
|
192 |
|
193 |
-
# --- Streamlit Main UI ---
|
194 |
-
st.title('影城排片效率与内容分析工具')
|
195 |
-
st.write("上传 `影片映出日累计报表.xlsx` 进行效率分析,或点击下方按钮查询 TMS 服务器影片内容。")
|
196 |
|
197 |
-
|
198 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
|
200 |
|
201 |
-
|
202 |
-
try:
|
203 |
-
df = pd.read_excel(uploaded_file, skiprows=3, header=None)
|
204 |
-
df.rename(columns={0: '影片名称', 2: '放映时间', 5: '总人次', 6: '总收入', 7: '座位数'}, inplace=True)
|
205 |
-
required_cols = ['影片名称', '放映时间', '座位数', '总收入', '总人次']
|
206 |
-
df = df[required_cols]
|
207 |
-
df.dropna(subset=['影片名称', '放映时间'], inplace=True)
|
208 |
-
for col in ['座位数', '总收入', '总人次']:
|
209 |
-
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
|
210 |
-
df['放映时间'] = pd.to_datetime(df['放映时间'], format='%H:%M:%S', errors='coerce').dt.time
|
211 |
-
df.dropna(subset=['放映时间'], inplace=True)
|
212 |
-
df['影片名称_清理后'] = df['影片名称'].apply(clean_movie_title)
|
213 |
-
st.toast("文件上传成功,效率分析已生成!", icon="🎉")
|
214 |
-
format_config = {'座位数': '{:,.0f}', '场次': '{:,.0f}', '人次': '{:,.0f}', '票房': '{:,.2f}', '均价': '{:.2f}',
|
215 |
-
'座次比': '{:.2%}', '场次比': '{:.2%}', '票房比': '{:.2%}', '座次效率': '{:.2f}',
|
216 |
-
'场次效率': '{:.2f}'}
|
217 |
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
if query_tms_for_location:
|
222 |
-
with st.spinner("正在关联查询 TMS 服务器..."):
|
223 |
-
_, tms_movie_list = fetch_and_process_server_movies()
|
224 |
-
full_day_analysis = add_tms_locations_to_analysis(full_day_analysis, tms_movie_list)
|
225 |
-
prime_time_analysis = add_tms_locations_to_analysis(prime_time_analysis, tms_movie_list)
|
226 |
-
|
227 |
-
# FIX 1: Reorder columns
|
228 |
-
if '影片所在影厅位置' in full_day_analysis.columns:
|
229 |
-
cols_full = full_day_analysis.columns.tolist()
|
230 |
-
cols_full.insert(1, cols_full.pop(cols_full.index('影片所在影厅位置')))
|
231 |
-
full_day_analysis = full_day_analysis[cols_full]
|
232 |
|
233 |
-
|
234 |
-
|
235 |
-
cols_prime.insert(1, cols_prime.pop(cols_prime.index('影片所在影厅位置')))
|
236 |
-
prime_time_analysis = prime_time_analysis[cols_prime]
|
237 |
|
238 |
-
|
|
|
|
|
|
|
|
|
239 |
|
|
|
|
|
240 |
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
|
|
246 |
|
247 |
-
|
248 |
-
|
249 |
-
st.dataframe(
|
250 |
-
prime_time_analysis.style.format(format_config),
|
251 |
-
use_container_width=True, hide_index=True)
|
252 |
|
253 |
-
|
254 |
-
|
255 |
-
movie_titles = full_day_analysis['影片'].tolist()
|
256 |
-
formatted_titles = ''.join([f'《{title}》' for title in movie_titles])
|
257 |
-
st.code(formatted_titles, language='text')
|
258 |
-
|
259 |
-
except Exception as e:
|
260 |
-
st.error(f"处理文件时出错: {e}")
|
261 |
|
|
|
|
|
|
|
262 |
|
263 |
-
st.
|
264 |
-
st.
|
265 |
-
if st.button('点击查询 TMS 服务器'):
|
266 |
-
with st.spinner("正在从 TMS 服务器获取数据中..."):
|
267 |
-
try:
|
268 |
-
halls_data, movie_list_sorted = fetch_and_process_server_movies()
|
269 |
-
st.toast("TMS 服务器数据获��成功!", icon="🎉")
|
270 |
|
271 |
-
|
272 |
-
|
273 |
-
df_view2 = pd.DataFrame(view2_data)
|
274 |
-
st.dataframe(df_view2, hide_index=True, use_container_width=True)
|
275 |
-
|
276 |
-
st.markdown("#### 按影厅查看影片内容")
|
277 |
-
hall_tabs = st.tabs(halls_data.keys())
|
278 |
-
for tab, hall_name in zip(hall_tabs, halls_data.keys()):
|
279 |
-
with tab:
|
280 |
-
view1_data_for_tab = [{'影片名称': item['details']['assert_name'], '所在影厅': " ".join(sorted([get_circled_number(h) for h in item['details']['halls']])), '文件名': item['content_name'], '时长': format_play_time(item['details']['play_time'])} for item in halls_data[hall_name]]
|
281 |
-
df_view1_tab = pd.DataFrame(view1_data_for_tab)
|
282 |
-
st.dataframe(df_view1_tab, hide_index=True, use_container_width=True)
|
283 |
-
|
284 |
-
except Exception as e:
|
285 |
-
st.error(f"查询服务器时出错: {e}")
|
286 |
-
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
+
import re
|
|
|
|
|
|
|
4 |
|
5 |
+
st.set_page_config(layout="wide")
|
|
|
6 |
|
7 |
+
st.title('影片放映时间表分析')
|
8 |
|
9 |
+
# 1. 文件上传组件
|
10 |
+
uploaded_file = st.file_uploader("上传“影片放映时间表.xlsx”文件", type=['xlsx'])
|
11 |
+
ad_duration = st.number_input('输入每个广告的时长(分钟)', min_value=0, value=9)
|
|
|
|
|
12 |
|
13 |
+
if uploaded_file is not None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
try:
|
15 |
+
# 读取Excel文件
|
16 |
+
df = pd.read_excel(uploaded_file, header=3)
|
|
|
|
|
|
|
17 |
|
18 |
+
# --- 错误修复 ---
|
19 |
+
# 明确将“影片”列转换为字符串类型,以避免混合类型错误
|
20 |
+
df['影片'] = df['影片'].astype(str)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
+
st.subheader('上传的原始数据')
|
23 |
+
st.dataframe(df)
|
24 |
|
|
|
|
|
|
|
25 |
|
26 |
+
# 2. 数据处理和清洗
|
27 |
+
# 清洗“影厅”列
|
28 |
+
def clean_hall_name(name):
|
29 |
+
if isinstance(name, str):
|
30 |
+
match = re.search(r'【(\d+)号', name)
|
31 |
+
if match:
|
32 |
+
return f"{match.group(1)}号厅"
|
33 |
+
return name
|
34 |
|
35 |
|
36 |
+
df['影厅'] = df['影厅'].apply(clean_hall_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
# 将“放映日期”转换为日期时间对象
|
39 |
+
df['放映日期'] = pd.to_datetime(df['放映日期'])
|
40 |
+
df['日期'] = df['放映日期'].dt.strftime('%m月%d日')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
+
# 删除在“影厅”或“片长”列中缺少数据的行
|
43 |
+
df.dropna(subset=['影厅', '片长'], inplace=True)
|
|
|
|
|
44 |
|
45 |
+
# 3. 统计每天每个影厅的影片数量和播放时长
|
46 |
+
summary = df.groupby(['日期', '影厅']).agg(
|
47 |
+
影片数量=('影片', 'count'),
|
48 |
+
影片播放时长=('片长', 'sum')
|
49 |
+
).reset_index()
|
50 |
|
51 |
+
# 计算广告时长
|
52 |
+
summary['广告时长'] = summary['影片数量'] * ad_duration
|
53 |
|
54 |
+
# 4. 创建数据透视表以进行最终显示
|
55 |
+
pivot_table = summary.pivot_table(
|
56 |
+
index='日期',
|
57 |
+
columns='影厅',
|
58 |
+
values=['广告时长', '影片播放时长']
|
59 |
+
)
|
60 |
|
61 |
+
# 将所有空白(NaN)值填充为 0
|
62 |
+
pivot_table = pivot_table.fillna(0)
|
|
|
|
|
|
|
63 |
|
64 |
+
# 将数值转换为整数,使表格更整洁
|
65 |
+
pivot_table = pivot_table.astype(int)
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
+
# 交换列的层级顺序并排序,以获得所需的输出格式
|
68 |
+
if not pivot_table.empty:
|
69 |
+
pivot_table = pivot_table.swaplevel(0, 1, axis=1).sort_index(axis=1)
|
70 |
|
71 |
+
st.subheader('影厅播放统计')
|
72 |
+
st.dataframe(pivot_table)
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
+
except Exception as e:
|
75 |
+
st.error(f"处理文件时出错: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|