Ethscriptions commited on
Commit
a1a37cc
·
verified ·
1 Parent(s): 559ede6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -263
app.py CHANGED
@@ -1,286 +1,75 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import numpy as np
4
- import requests
5
- import time
6
- from collections import defaultdict
7
 
8
- # Set page layout to wide mode and set page title
9
- st.set_page_config(layout="wide", page_title="影城效率与内容分析工具")
10
 
 
11
 
12
- # --- Efficiency Analysis Functions ---
13
- def clean_movie_title(title):
14
- if not isinstance(title, str):
15
- return title
16
- return title.split(' ', 1)[0]
17
 
18
-
19
- def style_efficiency(row):
20
- green = 'background-color: #E6F5E6;' # Light Green
21
- red = 'background-color: #FFE5E5;' # Light Red
22
- default = ''
23
- styles = [default] * len(row)
24
- seat_efficiency = row.get('座次效率', 0)
25
- session_efficiency = row.get('场次效率', 0)
26
- if seat_efficiency > 1.5 or session_efficiency > 1.5:
27
- styles = [green] * len(row)
28
- elif seat_efficiency < 0.5 or session_efficiency < 0.5:
29
- styles = [red] * len(row)
30
- return styles
31
-
32
-
33
- def process_and_analyze_data(df):
34
- if df.empty:
35
- return pd.DataFrame()
36
- analysis_df = df.groupby('影片名称_清理后').agg(
37
- 座位数=('座位数', 'sum'),
38
- 场次=('影片名称_清理后', 'size'),
39
- 票房=('总收入', 'sum'),
40
- 人次=('总人次', 'sum')
41
- ).reset_index()
42
- analysis_df.rename(columns={'影片名称_清理后': '影片'}, inplace=True)
43
- analysis_df = analysis_df.sort_values(by='票房', ascending=False).reset_index(drop=True)
44
- total_seats = analysis_df['座位数'].sum()
45
- total_sessions = analysis_df['场次'].sum()
46
- total_revenue = analysis_df['票房'].sum()
47
- analysis_df['均价'] = np.divide(analysis_df['票房'], analysis_df['人次']).fillna(0)
48
- analysis_df['座次比'] = np.divide(analysis_df['座位数'], total_seats).fillna(0)
49
- analysis_df['场次比'] = np.divide(analysis_df['场次'], total_sessions).fillna(0)
50
- analysis_df['票房比'] = np.divide(analysis_df['票房'], total_revenue).fillna(0)
51
- analysis_df['座次效率'] = np.divide(analysis_df['票房比'], analysis_df['座次比']).fillna(0)
52
- analysis_df['场次效率'] = np.divide(analysis_df['票房比'], analysis_df['场次比']).fillna(0)
53
- final_columns = ['影片', '座位数', '场次', '票房', '人次', '均价', '座次比', '场次比', '票房比', '座次效率',
54
- '场次效率']
55
- analysis_df = analysis_df[final_columns]
56
- return analysis_df
57
-
58
-
59
- # --- New Feature: Server Movie Content Inquiry ---
60
- @st.cache_data(show_spinner=False)
61
- def fetch_and_process_server_movies(priority_movie_titles=None):
62
- if priority_movie_titles is None:
63
- priority_movie_titles = []
64
-
65
- # 1. Get Token
66
- token_headers = {
67
- 'Host': 'oa.hengdianfilm.com:7080', 'Content-Type': 'application/json',
68
- 'Origin': 'http://115.239.253.233:7080', 'Connection': 'keep-alive',
69
- 'Accept': 'application/json, text/javascript, */*; q=0.01',
70
- 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 18_5_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/138.0.7204.156 Mobile/15E148 Safari/604.1',
71
- 'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
72
- }
73
- token_json_data = {'appId': 'hd', 'appSecret': 'ad761f8578cc6170', 'timeStamp': int(time.time() * 1000)}
74
- token_url = 'http://oa.hengdianfilm.com:7080/cinema-api/admin/generateToken?token=hd&murl=?token=hd&murl=ticket=-1495916529737643774'
75
- response = requests.post(token_url, headers=token_headers, json=token_json_data, timeout=10)
76
- response.raise_for_status()
77
- token_data = response.json()
78
- if token_data.get('error_code') != '0000':
79
- raise Exception(f"获取Token失败: {token_data.get('error_desc')}")
80
- auth_token = token_data['param']
81
-
82
- # 2. Fetch movie list (with pagination and delay)
83
- all_movies = []
84
- page_index = 1
85
- while True:
86
- list_headers = {
87
- 'Accept': 'application/json, text/javascript, */*; q=0.01',
88
- 'Content-Type': 'application/json; charset=UTF-8',
89
- 'Origin': 'http://115.239.253.233:7080', 'Proxy-Connection': 'keep-alive', 'Token': auth_token,
90
- 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36',
91
- 'X-SESSIONID': 'PQ0J3K85GJEDVYIGZE1KEG1K80USDAP4',
92
- }
93
- list_params = {'token': 'hd', 'murl': 'ContentMovie'}
94
- list_json_data = {'THEATER_ID': 38205954, 'SOURCE': 'SERVER', 'ASSERT_TYPE': 2, 'PAGE_CAPACITY': 20,
95
- 'PAGE_INDEX': page_index}
96
- list_url = 'http://oa.hengdianfilm.com:7080/cinema-api/cinema/server/dcp/list'
97
- response = requests.post(list_url, params=list_params, headers=list_headers, json=list_json_data, verify=False)
98
- response.raise_for_status()
99
- movie_data = response.json()
100
- if movie_data.get("RSPCD") != "000000":
101
- raise Exception(f"获取影片列表失败: {movie_data.get('RSPMSG')}")
102
- body = movie_data.get("BODY", {})
103
- movies_on_page = body.get("LIST", [])
104
- if not movies_on_page: break
105
- all_movies.extend(movies_on_page)
106
- if len(all_movies) >= body.get("COUNT", 0): break
107
- page_index += 1
108
- time.sleep(1) # Add 1-second delay between requests
109
-
110
- # 3. Process data into a central, detailed structure
111
- movie_details = {}
112
- for movie in all_movies:
113
- content_name = movie.get('CONTENT_NAME')
114
- if not content_name: continue
115
- movie_details[content_name] = {
116
- 'assert_name': movie.get('ASSERT_NAME'),
117
- 'halls': sorted([h.get('HALL_NAME') for h in movie.get('HALL_INFO', [])]),
118
- 'play_time': movie.get('PLAY_TIME')
119
- }
120
-
121
- # 4. Prepare data for the two display views
122
- by_hall = defaultdict(list)
123
- for content_name, details in movie_details.items():
124
- for hall_name in details['halls']:
125
- by_hall[hall_name].append({'content_name': content_name, 'details': details})
126
-
127
- for hall_name in by_hall:
128
- by_hall[hall_name].sort(key=lambda item: (
129
- item['details']['assert_name'] is None or item['details']['assert_name'] == '',
130
- item['details']['assert_name'] or item['content_name']
131
- ))
132
-
133
- view2_list = []
134
- for content_name, details in movie_details.items():
135
- if details.get('assert_name'):
136
- view2_list.append({
137
- 'assert_name': details['assert_name'],
138
- 'content_name': content_name,
139
- 'halls': details['halls'],
140
- 'play_time': details['play_time']
141
- })
142
-
143
- priority_list = [item for item in view2_list if
144
- any(p_title in item['assert_name'] for p_title in priority_movie_titles)]
145
- other_list_items = [item for item in view2_list if item not in priority_list]
146
-
147
- priority_list.sort(key=lambda x: x['assert_name'])
148
- other_list_items.sort(key=lambda x: x['assert_name'])
149
-
150
- final_sorted_list = priority_list + other_list_items
151
-
152
- return dict(sorted(by_hall.items())), final_sorted_list
153
-
154
-
155
- def get_circled_number(hall_name):
156
- mapping = {'1': '①', '2': '②', '3': '③', '4': '④', '5': '⑤', '6': '⑥', '7': '⑦', '8': '⑧', '9': '⑨'}
157
- num_str = ''.join(filter(str.isdigit, hall_name))
158
- return mapping.get(num_str, '')
159
-
160
-
161
- def format_play_time(time_str):
162
- if not time_str or not isinstance(time_str, str): return None
163
  try:
164
- parts = time_str.split(':'); hours = int(parts[0]); minutes = int(parts[1])
165
- return hours * 60 + minutes
166
- except (ValueError, IndexError):
167
- return None
168
-
169
 
170
- # --- UPDATED Helper function to add TMS location column ---
171
- def add_tms_locations_to_analysis(analysis_df, tms_movie_list):
172
- locations = []
173
- for index, row in analysis_df.iterrows():
174
- movie_title = row['影片']
175
- found_versions = []
176
- for tms_movie in tms_movie_list:
177
- # FIX 3: Change matching from 'in' to 'startswith'
178
- if tms_movie['assert_name'].startswith(movie_title):
179
- version_name = tms_movie['assert_name'].replace(movie_title, '').strip()
180
- circled_halls = " ".join(sorted([get_circled_number(h) for h in tms_movie['halls']]))
181
-
182
- # FIX 2: Handle empty version name to remove colon
183
- if version_name:
184
- found_versions.append(f"{version_name}:{circled_halls}")
185
- else:
186
- found_versions.append(circled_halls)
187
-
188
- locations.append('|'.join(found_versions))
189
 
190
- analysis_df['影片所在影厅位置'] = locations
191
- return analysis_df
192
 
193
- # --- Streamlit Main UI ---
194
- st.title('影城排片效率与内容分析工具')
195
- st.write("上传 `影片映出日累计报表.xlsx` 进行效率分析,或点击下方按钮查询 TMS 服务器影片内容。")
196
 
197
- uploaded_file = st.file_uploader("请在此处上传 Excel 文件", type=['xlsx', 'xls', 'csv'])
198
- query_tms_for_location = st.checkbox("查询 TMS 找影片所在影厅")
 
 
 
 
 
 
199
 
200
 
201
- if uploaded_file is not None:
202
- try:
203
- df = pd.read_excel(uploaded_file, skiprows=3, header=None)
204
- df.rename(columns={0: '影片名称', 2: '放映时间', 5: '总人次', 6: '总收入', 7: '座位数'}, inplace=True)
205
- required_cols = ['影片名称', '放映时间', '座位数', '总收入', '总人次']
206
- df = df[required_cols]
207
- df.dropna(subset=['影片名称', '放映时间'], inplace=True)
208
- for col in ['座位数', '总收入', '总人次']:
209
- df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
210
- df['放映时间'] = pd.to_datetime(df['放映时间'], format='%H:%M:%S', errors='coerce').dt.time
211
- df.dropna(subset=['放映时间'], inplace=True)
212
- df['影片名称_清理后'] = df['影片名称'].apply(clean_movie_title)
213
- st.toast("文件上传成功,效率分析已生成!", icon="🎉")
214
- format_config = {'座位数': '{:,.0f}', '场次': '{:,.0f}', '人次': '{:,.0f}', '票房': '{:,.2f}', '均价': '{:.2f}',
215
- '座次比': '{:.2%}', '场次比': '{:.2%}', '票房比': '{:.2%}', '座次效率': '{:.2f}',
216
- '场次效率': '{:.2f}'}
217
 
218
- full_day_analysis = process_and_analyze_data(df.copy())
219
- prime_time_analysis = process_and_analyze_data(df[df['放映时间'].between(pd.to_datetime('14:00:00').time(), pd.to_datetime('21:00:00').time())].copy())
220
-
221
- if query_tms_for_location:
222
- with st.spinner("正在关联查询 TMS 服务器..."):
223
- _, tms_movie_list = fetch_and_process_server_movies()
224
- full_day_analysis = add_tms_locations_to_analysis(full_day_analysis, tms_movie_list)
225
- prime_time_analysis = add_tms_locations_to_analysis(prime_time_analysis, tms_movie_list)
226
-
227
- # FIX 1: Reorder columns
228
- if '影片所在影厅位置' in full_day_analysis.columns:
229
- cols_full = full_day_analysis.columns.tolist()
230
- cols_full.insert(1, cols_full.pop(cols_full.index('影片所在影厅位置')))
231
- full_day_analysis = full_day_analysis[cols_full]
232
 
233
- if '影片所在影厅位置' in prime_time_analysis.columns:
234
- cols_prime = prime_time_analysis.columns.tolist()
235
- cols_prime.insert(1, cols_prime.pop(cols_prime.index('影片所在影厅位置')))
236
- prime_time_analysis = prime_time_analysis[cols_prime]
237
 
238
- st.toast("TMS 影片位置关联成功!", icon="🔗")
 
 
 
 
239
 
 
 
240
 
241
- st.markdown("### 全天排片效率分析")
242
- if not full_day_analysis.empty:
243
- st.dataframe(
244
- full_day_analysis.style.format(format_config),
245
- use_container_width=True, hide_index=True)
 
246
 
247
- st.markdown("#### 黄金时段排片效率分析 (14:00-21:00)")
248
- if not prime_time_analysis.empty:
249
- st.dataframe(
250
- prime_time_analysis.style.format(format_config),
251
- use_container_width=True, hide_index=True)
252
 
253
- if not full_day_analysis.empty:
254
- st.markdown("##### 复制当日排片列表")
255
- movie_titles = full_day_analysis['影片'].tolist()
256
- formatted_titles = ''.join([f'《{title}》' for title in movie_titles])
257
- st.code(formatted_titles, language='text')
258
-
259
- except Exception as e:
260
- st.error(f"处理文件时出错: {e}")
261
 
 
 
 
262
 
263
- st.divider()
264
- st.markdown("### TMS 服务器影片内容查询")
265
- if st.button('点击查询 TMS 服务器'):
266
- with st.spinner("正在从 TMS 服务器获取数据中..."):
267
- try:
268
- halls_data, movie_list_sorted = fetch_and_process_server_movies()
269
- st.toast("TMS 服务器数据获��成功!", icon="🎉")
270
 
271
- st.markdown("#### 按影片查看所在影厅")
272
- view2_data = [{'影片名称': item['assert_name'], '所在影厅': " ".join(sorted([get_circled_number(h) for h in item['halls']])), '文件名': item['content_name'], '时长': format_play_time(item['play_time'])} for item in movie_list_sorted]
273
- df_view2 = pd.DataFrame(view2_data)
274
- st.dataframe(df_view2, hide_index=True, use_container_width=True)
275
-
276
- st.markdown("#### 按影厅查看影片内容")
277
- hall_tabs = st.tabs(halls_data.keys())
278
- for tab, hall_name in zip(hall_tabs, halls_data.keys()):
279
- with tab:
280
- view1_data_for_tab = [{'影片名称': item['details']['assert_name'], '所在影厅': " ".join(sorted([get_circled_number(h) for h in item['details']['halls']])), '文件名': item['content_name'], '时长': format_play_time(item['details']['play_time'])} for item in halls_data[hall_name]]
281
- df_view1_tab = pd.DataFrame(view1_data_for_tab)
282
- st.dataframe(df_view1_tab, hide_index=True, use_container_width=True)
283
-
284
- except Exception as e:
285
- st.error(f"查询服务器时出错: {e}")
286
-
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import re
 
 
 
4
 
5
+ st.set_page_config(layout="wide")
 
6
 
7
+ st.title('影片放映时间表分析')
8
 
9
+ # 1. 文件上传组件
10
+ uploaded_file = st.file_uploader("上传“影片放映时间表.xlsx”文件", type=['xlsx'])
11
+ ad_duration = st.number_input('输入每个广告的时长(分钟)', min_value=0, value=9)
 
 
12
 
13
+ if uploaded_file is not None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  try:
15
+ # 读取Excel文件
16
+ df = pd.read_excel(uploaded_file, header=3)
 
 
 
17
 
18
+ # --- 错误修复 ---
19
+ # 明确将“影片”列转换为字符串类型,以避免混合类型错误
20
+ df['影片'] = df['影片'].astype(str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ st.subheader('上传的原始数据')
23
+ st.dataframe(df)
24
 
 
 
 
25
 
26
+ # 2. 数据处理和清洗
27
+ # 清洗“影厅”列
28
+ def clean_hall_name(name):
29
+ if isinstance(name, str):
30
+ match = re.search(r'【(\d+)号', name)
31
+ if match:
32
+ return f"{match.group(1)}号厅"
33
+ return name
34
 
35
 
36
+ df['影厅'] = df['影厅'].apply(clean_hall_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ # 将“放映日期”转换为日期时间对象
39
+ df['放映日期'] = pd.to_datetime(df['放映日期'])
40
+ df['日期'] = df['放映日期'].dt.strftime('%m月%d日')
 
 
 
 
 
 
 
 
 
 
 
41
 
42
+ # 删除在“影厅”或“片长”列中缺少数据的行
43
+ df.dropna(subset=['影厅', '片长'], inplace=True)
 
 
44
 
45
+ # 3. 统计每天每个影厅的影片数量和播放时长
46
+ summary = df.groupby(['日期', '影厅']).agg(
47
+ 影片数量=('影片', 'count'),
48
+ 影片播放时长=('片长', 'sum')
49
+ ).reset_index()
50
 
51
+ # 计算广告时长
52
+ summary['广告时长'] = summary['影片数量'] * ad_duration
53
 
54
+ # 4. 创建数据透视表以进行最终显示
55
+ pivot_table = summary.pivot_table(
56
+ index='日期',
57
+ columns='影厅',
58
+ values=['广告时长', '影片播放时长']
59
+ )
60
 
61
+ # 将所有空白(NaN)值填充为 0
62
+ pivot_table = pivot_table.fillna(0)
 
 
 
63
 
64
+ # 将数值转换为整数,使表格更整洁
65
+ pivot_table = pivot_table.astype(int)
 
 
 
 
 
 
66
 
67
+ # 交换列的层级顺序并排序,以获得所需的输出格式
68
+ if not pivot_table.empty:
69
+ pivot_table = pivot_table.swaplevel(0, 1, axis=1).sort_index(axis=1)
70
 
71
+ st.subheader('影厅播放统计')
72
+ st.dataframe(pivot_table)
 
 
 
 
 
73
 
74
+ except Exception as e:
75
+ st.error(f"处理文件时出错: {e}")