use std::error::Error; use oxc::{ allocator::{Allocator, StringBuilder}, ast::ast::{ AssignmentExpression, AssignmentTarget, CallExpression, DebuggerStatement, ExportAllDeclaration, ExportNamedDeclaration, Expression, FunctionBody, IdentifierReference, ImportDeclaration, ImportExpression, MemberExpression, MetaProperty, NewExpression, ObjectExpression, ObjectPropertyKind, ReturnStatement, StringLiteral, ThisExpression, UnaryExpression, UnaryOperator, UpdateExpression, }, ast_visit::{Visit, walk}, span::{Atom, GetSpan, Span}, }; use crate::{ cfg::{Config, Flags, UrlRewriter}, changes::JsChanges, rewrite::rewrite, }; // js MUST not be able to get a reference to any of these because sbx // // maybe move this out of this lib? const UNSAFE_GLOBALS: &[&str] = &[ "window", "self", "globalThis", "this", "parent", "top", "location", "document", "eval", "frames", ]; pub struct Visitor<'alloc, 'data, E> where E: UrlRewriter, { pub alloc: &'alloc Allocator, pub jschanges: JsChanges<'alloc, 'data>, pub error: Option>, pub config: &'data Config, pub rewriter: &'data E, pub flags: Flags, } impl<'data, E> Visitor<'_, 'data, E> where E: UrlRewriter, { fn rewrite_url(&mut self, url: &StringLiteral<'data>, module: bool) { let mut builder = StringBuilder::from_str_in(&self.config.prefix, self.alloc); if self.error.is_some() { builder.push_str("__URL_REWRITER_ALREADY_ERRORED__"); } else if let Err(err) = self.rewriter .rewrite(self.config, &self.flags, &url.value, &mut builder, module) { self.error.replace(err); builder.push_str("__URL_REWRITER_ERROR__"); } let text = builder.into_str(); self.jschanges .add(rewrite!(url.span.shrink(1), Replace { text })); } fn rewrite_ident(&mut self, name: &Atom, span: Span) { if UNSAFE_GLOBALS.contains(&name.as_str()) { self.jschanges.add(rewrite!(span, WrapFn { wrap: true })); } } fn walk_member_expression(&mut self, it: &Expression) -> bool { match it { Expression::Identifier(s) => { self.rewrite_ident(&s.name, s.span); true } Expression::StaticMemberExpression(s) => self.walk_member_expression(&s.object), Expression::ComputedMemberExpression(s) => self.walk_member_expression(&s.object), _ => false, } } fn scramitize(&mut self, span: Span) { self.jschanges.add(rewrite!(span, Scramitize)); } } impl<'data, E> Visit<'data> for Visitor<'_, 'data, E> where E: UrlRewriter, { fn visit_identifier_reference(&mut self, it: &IdentifierReference) { // if self.config.capture_errors { // self.jschanges.insert(JsChange::GenericChange { // span: it.span, // text: format!( // "{}({}, typeof arguments != 'undefined' && arguments)", // self.config.wrapfn, it.name // ), // }); // } else { // if UNSAFE_GLOBALS.contains(&it.name.as_str()) { self.jschanges .add(rewrite!(it.span, WrapFn { wrap: false })); } // } } fn visit_new_expression(&mut self, it: &NewExpression<'data>) { self.walk_member_expression(&it.callee); walk::walk_arguments(self, &it.arguments); } fn visit_member_expression(&mut self, it: &MemberExpression<'data>) { // TODO // you could break this with ["postMessage"] etc // however this code only exists because of recaptcha whatever // and it would slow down js execution a lot if let MemberExpression::StaticMemberExpression(s) = it { if s.property.name == "postMessage" { self.jschanges.add(rewrite!(s.property.span, SetRealmFn)); walk::walk_expression(self, &s.object); return; // unwise to walk the rest of the tree } if !self.flags.strict_rewrites && !UNSAFE_GLOBALS.contains(&s.property.name.as_str()) && let Expression::Identifier(_) | Expression::ThisExpression(_) = &s.object { // cull tree - this should be safe return; } if self.flags.scramitize && !matches!(s.object, Expression::MetaProperty(_) | Expression::Super(_)) { self.scramitize(s.object.span()); } } walk::walk_member_expression(self, it); } fn visit_this_expression(&mut self, it: &ThisExpression) { self.jschanges.add(rewrite!(it.span, WrapThisFn)); } fn visit_debugger_statement(&mut self, it: &DebuggerStatement) { // delete debugger statements entirely. some sites will spam debugger as an anti-debugging measure, and we don't want that! self.jschanges.add(rewrite!(it.span, Delete)); } // we can't overwrite window.eval in the normal way because that would make everything an // indirect eval, which could break things. we handle that edge case here fn visit_call_expression(&mut self, it: &CallExpression<'data>) { if let Expression::Identifier(s) = &it.callee { // if it's optional that actually makes it an indirect eval which is handled separately if s.name == "eval" && !it.optional { self.jschanges.add(rewrite!( it.span, Eval { inner: Span::new(s.span.end + 1, it.span.end), } )); // then we walk the arguments, but not the callee, since we want it to resolve to // the real eval walk::walk_arguments(self, &it.arguments); return; } } if self.flags.scramitize { self.scramitize(it.span); } walk::walk_call_expression(self, it); } fn visit_import_declaration(&mut self, it: &ImportDeclaration<'data>) { self.rewrite_url(&it.source, true); walk::walk_import_declaration(self, it); } fn visit_import_expression(&mut self, it: &ImportExpression<'data>) { self.jschanges.add(rewrite!( Span::new(it.span.start, it.span.start + 7), ImportFn )); walk::walk_import_expression(self, it); } fn visit_export_all_declaration(&mut self, it: &ExportAllDeclaration<'data>) { self.rewrite_url(&it.source, true); } fn visit_export_named_declaration(&mut self, it: &ExportNamedDeclaration<'data>) { if let Some(source) = &it.source { self.rewrite_url(source, true); } // do not walk further, we don't want to rewrite the identifiers } #[cfg(feature = "debug")] fn visit_try_statement(&mut self, it: &oxc::ast::ast::TryStatement<'data>) { // for debugging we need to know what the error was if self.flags.capture_errors && let Some(h) = &it.handler && let Some(name) = &h.param && let Some(ident) = name.pattern.get_identifier_name() { let start = h.body.span.start + 1; self.jschanges .add(rewrite!(Span::new(start, start), ScramErr { ident })); } walk::walk_try_statement(self, it); } fn visit_object_expression(&mut self, it: &ObjectExpression<'data>) { for prop in &it.properties { if let ObjectPropertyKind::ObjectProperty(p) = prop && let Expression::Identifier(s) = &p.value && UNSAFE_GLOBALS.contains(&s.name.to_string().as_str()) && p.shorthand { self.jschanges .add(rewrite!(s.span, ShorthandObj { name: s.name })); return; } } walk::walk_object_expression(self, it); } fn visit_function_body(&mut self, it: &FunctionBody<'data>) { // tag function for use in sourcemaps if self.flags.do_sourcemaps { self.jschanges .add(rewrite!(Span::new(it.span.start, it.span.start), SourceTag)); } walk::walk_function_body(self, it); } fn visit_return_statement(&mut self, it: &ReturnStatement<'data>) { // if let Some(arg) = &it.argument { // self.jschanges.insert(JsChange::GenericChange { // span: Span::new(it.span.start + 6, it.span.start + 6), // text: format!(" $scramdbg((()=>{{ try {{return arguments}} catch(_){{}} }})(),("), // }); // self.jschanges.insert(JsChange::GenericChange { // span: Span::new(expression_span(arg).end, expression_span(arg).end), // text: format!("))"), // }); // } walk::walk_return_statement(self, it); } fn visit_unary_expression(&mut self, it: &UnaryExpression<'data>) { if matches!(it.operator, UnaryOperator::Typeof) { // don't walk to identifier rewrites since it won't matter return; } walk::walk_unary_expression(self, it); } fn visit_update_expression(&mut self, _it: &UpdateExpression<'data>) { // then no, don't walk it, we don't care } fn visit_meta_property(&mut self, it: &MetaProperty<'data>) { if it.meta.name == "import" { self.jschanges.add(rewrite!(it.span, MetaFn)); } } fn visit_assignment_expression(&mut self, it: &AssignmentExpression<'data>) { match &it.left { AssignmentTarget::AssignmentTargetIdentifier(s) => { if ["location"].contains(&s.name.to_string().as_str()) { self.jschanges.add(rewrite!( it.span, Assignment { name: s.name, rhs: it.right.span(), op: it.operator, } )); // avoid walking rest of tree, i would need to figure out nested rewrites // somehow return; } } AssignmentTarget::ArrayAssignmentTarget(_) => { // [location] = ["https://example.com"] // this is such a ridiculously specific edge case. just ignore it return; } _ => { // only walk the left side if it isn't an identifier, we can't replace the // identifier with a function obviously walk::walk_assignment_target(self, &it.left); } } walk::walk_expression(self, &it.right); } }